huggingface-hub 0.26.2__py3-none-any.whl → 0.27.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +49 -23
- huggingface_hub/_commit_scheduler.py +30 -4
- huggingface_hub/_local_folder.py +0 -4
- huggingface_hub/_login.py +38 -54
- huggingface_hub/_snapshot_download.py +6 -3
- huggingface_hub/_tensorboard_logger.py +2 -3
- huggingface_hub/_upload_large_folder.py +1 -1
- huggingface_hub/errors.py +19 -0
- huggingface_hub/fastai_utils.py +3 -2
- huggingface_hub/file_download.py +10 -12
- huggingface_hub/hf_api.py +102 -498
- huggingface_hub/hf_file_system.py +274 -35
- huggingface_hub/hub_mixin.py +5 -25
- huggingface_hub/inference/_client.py +185 -136
- huggingface_hub/inference/_common.py +2 -2
- huggingface_hub/inference/_generated/_async_client.py +186 -137
- huggingface_hub/inference/_generated/types/__init__.py +31 -10
- huggingface_hub/inference/_generated/types/audio_classification.py +3 -5
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +4 -8
- huggingface_hub/inference/_generated/types/chat_completion.py +8 -5
- huggingface_hub/inference/_generated/types/depth_estimation.py +1 -1
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -6
- huggingface_hub/inference/_generated/types/feature_extraction.py +1 -1
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -4
- huggingface_hub/inference/_generated/types/image_classification.py +3 -5
- huggingface_hub/inference/_generated/types/image_segmentation.py +2 -4
- huggingface_hub/inference/_generated/types/image_to_image.py +2 -4
- huggingface_hub/inference/_generated/types/image_to_text.py +4 -8
- huggingface_hub/inference/_generated/types/object_detection.py +2 -4
- huggingface_hub/inference/_generated/types/question_answering.py +2 -4
- huggingface_hub/inference/_generated/types/sentence_similarity.py +1 -1
- huggingface_hub/inference/_generated/types/summarization.py +2 -4
- huggingface_hub/inference/_generated/types/table_question_answering.py +21 -3
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -4
- huggingface_hub/inference/_generated/types/text_classification.py +4 -10
- huggingface_hub/inference/_generated/types/text_to_audio.py +6 -10
- huggingface_hub/inference/_generated/types/text_to_image.py +2 -4
- huggingface_hub/inference/_generated/types/text_to_speech.py +6 -10
- huggingface_hub/inference/_generated/types/token_classification.py +11 -12
- huggingface_hub/inference/_generated/types/translation.py +2 -4
- huggingface_hub/inference/_generated/types/video_classification.py +3 -4
- huggingface_hub/inference/_generated/types/visual_question_answering.py +2 -5
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +8 -18
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +9 -19
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +7 -9
- huggingface_hub/keras_mixin.py +3 -2
- huggingface_hub/lfs.py +2 -5
- huggingface_hub/repocard_data.py +4 -4
- huggingface_hub/serialization/__init__.py +2 -0
- huggingface_hub/serialization/_dduf.py +387 -0
- huggingface_hub/serialization/_torch.py +407 -25
- huggingface_hub/utils/_cache_manager.py +1 -1
- huggingface_hub/utils/_datetime.py +14 -9
- huggingface_hub/utils/_headers.py +9 -25
- huggingface_hub/utils/tqdm.py +15 -0
- {huggingface_hub-0.26.2.dist-info → huggingface_hub-0.27.0rc0.dist-info}/METADATA +8 -3
- {huggingface_hub-0.26.2.dist-info → huggingface_hub-0.27.0rc0.dist-info}/RECORD +61 -61
- {huggingface_hub-0.26.2.dist-info → huggingface_hub-0.27.0rc0.dist-info}/WHEEL +1 -1
- huggingface_hub/_multi_commits.py +0 -306
- {huggingface_hub-0.26.2.dist-info → huggingface_hub-0.27.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.26.2.dist-info → huggingface_hub-0.27.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.26.2.dist-info → huggingface_hub-0.27.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -56,17 +56,24 @@ from huggingface_hub.inference._generated.types import (
|
|
|
56
56
|
AutomaticSpeechRecognitionOutput,
|
|
57
57
|
ChatCompletionInputGrammarType,
|
|
58
58
|
ChatCompletionInputStreamOptions,
|
|
59
|
-
|
|
59
|
+
ChatCompletionInputTool,
|
|
60
|
+
ChatCompletionInputToolChoiceClass,
|
|
61
|
+
ChatCompletionInputToolChoiceEnum,
|
|
60
62
|
ChatCompletionOutput,
|
|
61
63
|
ChatCompletionStreamOutput,
|
|
62
64
|
DocumentQuestionAnsweringOutputElement,
|
|
63
65
|
FillMaskOutputElement,
|
|
64
66
|
ImageClassificationOutputElement,
|
|
67
|
+
ImageClassificationOutputTransform,
|
|
65
68
|
ImageSegmentationOutputElement,
|
|
69
|
+
ImageSegmentationSubtask,
|
|
70
|
+
ImageToImageTargetSize,
|
|
66
71
|
ImageToTextOutput,
|
|
67
72
|
ObjectDetectionOutputElement,
|
|
73
|
+
Padding,
|
|
68
74
|
QuestionAnsweringOutputElement,
|
|
69
75
|
SummarizationOutput,
|
|
76
|
+
SummarizationTruncationStrategy,
|
|
70
77
|
TableQuestionAnsweringOutputElement,
|
|
71
78
|
TextClassificationOutputElement,
|
|
72
79
|
TextClassificationOutputTransform,
|
|
@@ -75,9 +82,10 @@ from huggingface_hub.inference._generated.types import (
|
|
|
75
82
|
TextGenerationStreamOutput,
|
|
76
83
|
TextToImageTargetSize,
|
|
77
84
|
TextToSpeechEarlyStoppingEnum,
|
|
85
|
+
TokenClassificationAggregationStrategy,
|
|
78
86
|
TokenClassificationOutputElement,
|
|
79
|
-
ToolElement,
|
|
80
87
|
TranslationOutput,
|
|
88
|
+
TranslationTruncationStrategy,
|
|
81
89
|
VisualQuestionAnsweringOutputElement,
|
|
82
90
|
ZeroShotClassificationOutputElement,
|
|
83
91
|
ZeroShotImageClassificationOutputElement,
|
|
@@ -170,7 +178,9 @@ class AsyncInferenceClient:
|
|
|
170
178
|
|
|
171
179
|
self.model: Optional[str] = model
|
|
172
180
|
self.token: Union[str, bool, None] = token if token is not None else api_key
|
|
173
|
-
self.headers = CaseInsensitiveDict(
|
|
181
|
+
self.headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
|
|
182
|
+
build_hf_headers(token=self.token) # 'authorization' + 'user-agent'
|
|
183
|
+
)
|
|
174
184
|
if headers is not None:
|
|
175
185
|
self.headers.update(headers)
|
|
176
186
|
self.cookies = cookies
|
|
@@ -317,7 +327,7 @@ class AsyncInferenceClient:
|
|
|
317
327
|
logger.info(f"Waiting for model to be loaded on the server: {error}")
|
|
318
328
|
if "X-wait-for-model" not in headers and url.startswith(INFERENCE_ENDPOINT):
|
|
319
329
|
headers["X-wait-for-model"] = "1"
|
|
320
|
-
|
|
330
|
+
await asyncio.sleep(1)
|
|
321
331
|
if timeout is not None:
|
|
322
332
|
timeout = max(self.timeout - (time.time() - t0), 1) # type: ignore
|
|
323
333
|
continue
|
|
@@ -374,7 +384,7 @@ class AsyncInferenceClient:
|
|
|
374
384
|
top_k (`int`, *optional*):
|
|
375
385
|
When specified, limits the output to the top K most probable classes.
|
|
376
386
|
function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
|
|
377
|
-
The function to apply to the
|
|
387
|
+
The function to apply to the model outputs in order to retrieve the scores.
|
|
378
388
|
|
|
379
389
|
Returns:
|
|
380
390
|
`List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
@@ -502,9 +512,9 @@ class AsyncInferenceClient:
|
|
|
502
512
|
stop: Optional[List[str]] = None,
|
|
503
513
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
504
514
|
temperature: Optional[float] = None,
|
|
505
|
-
tool_choice: Optional[Union[
|
|
515
|
+
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
506
516
|
tool_prompt: Optional[str] = None,
|
|
507
|
-
tools: Optional[List[
|
|
517
|
+
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
508
518
|
top_logprobs: Optional[int] = None,
|
|
509
519
|
top_p: Optional[float] = None,
|
|
510
520
|
) -> ChatCompletionOutput: ...
|
|
@@ -527,9 +537,9 @@ class AsyncInferenceClient:
|
|
|
527
537
|
stop: Optional[List[str]] = None,
|
|
528
538
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
529
539
|
temperature: Optional[float] = None,
|
|
530
|
-
tool_choice: Optional[Union[
|
|
540
|
+
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
531
541
|
tool_prompt: Optional[str] = None,
|
|
532
|
-
tools: Optional[List[
|
|
542
|
+
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
533
543
|
top_logprobs: Optional[int] = None,
|
|
534
544
|
top_p: Optional[float] = None,
|
|
535
545
|
) -> AsyncIterable[ChatCompletionStreamOutput]: ...
|
|
@@ -552,9 +562,9 @@ class AsyncInferenceClient:
|
|
|
552
562
|
stop: Optional[List[str]] = None,
|
|
553
563
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
554
564
|
temperature: Optional[float] = None,
|
|
555
|
-
tool_choice: Optional[Union[
|
|
565
|
+
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
556
566
|
tool_prompt: Optional[str] = None,
|
|
557
|
-
tools: Optional[List[
|
|
567
|
+
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
558
568
|
top_logprobs: Optional[int] = None,
|
|
559
569
|
top_p: Optional[float] = None,
|
|
560
570
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
|
|
@@ -577,9 +587,9 @@ class AsyncInferenceClient:
|
|
|
577
587
|
stop: Optional[List[str]] = None,
|
|
578
588
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
579
589
|
temperature: Optional[float] = None,
|
|
580
|
-
tool_choice: Optional[Union[
|
|
590
|
+
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
581
591
|
tool_prompt: Optional[str] = None,
|
|
582
|
-
tools: Optional[List[
|
|
592
|
+
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
583
593
|
top_logprobs: Optional[int] = None,
|
|
584
594
|
top_p: Optional[float] = None,
|
|
585
595
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
|
|
@@ -618,7 +628,7 @@ class AsyncInferenceClient:
|
|
|
618
628
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
619
629
|
probabilities of each output token returned in the content of message.
|
|
620
630
|
max_tokens (`int`, *optional*):
|
|
621
|
-
Maximum number of tokens allowed in the response. Defaults to
|
|
631
|
+
Maximum number of tokens allowed in the response. Defaults to 100.
|
|
622
632
|
n (`int`, *optional*):
|
|
623
633
|
UNUSED.
|
|
624
634
|
presence_penalty (`float`, *optional*):
|
|
@@ -645,11 +655,11 @@ class AsyncInferenceClient:
|
|
|
645
655
|
top_p (`float`, *optional*):
|
|
646
656
|
Fraction of the most likely next words to sample from.
|
|
647
657
|
Must be between 0 and 1. Defaults to 1.0.
|
|
648
|
-
tool_choice ([`
|
|
658
|
+
tool_choice ([`ChatCompletionInputToolChoiceClass`] or [`ChatCompletionInputToolChoiceEnum`], *optional*):
|
|
649
659
|
The tool to use for the completion. Defaults to "auto".
|
|
650
660
|
tool_prompt (`str`, *optional*):
|
|
651
661
|
A prompt to be appended before the tools.
|
|
652
|
-
tools (List of [`
|
|
662
|
+
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
653
663
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
654
664
|
provide a list of functions the model may generate JSON inputs for.
|
|
655
665
|
|
|
@@ -981,28 +991,25 @@ class AsyncInferenceClient:
|
|
|
981
991
|
a deployed Inference Endpoint. If not provided, the default recommended document question answering model will be used.
|
|
982
992
|
Defaults to None.
|
|
983
993
|
doc_stride (`int`, *optional*):
|
|
984
|
-
If the words in the document are too long to fit with the question for the model, it will
|
|
985
|
-
|
|
986
|
-
overlap.
|
|
994
|
+
If the words in the document are too long to fit with the question for the model, it will be split in
|
|
995
|
+
several chunks with some overlap. This argument controls the size of that overlap.
|
|
987
996
|
handle_impossible_answer (`bool`, *optional*):
|
|
988
|
-
Whether to accept impossible as an answer
|
|
997
|
+
Whether to accept impossible as an answer
|
|
989
998
|
lang (`str`, *optional*):
|
|
990
|
-
Language to use while running OCR.
|
|
999
|
+
Language to use while running OCR. Defaults to english.
|
|
991
1000
|
max_answer_len (`int`, *optional*):
|
|
992
|
-
The maximum length of predicted answers (e.g., only answers with a shorter length are
|
|
993
|
-
considered).
|
|
1001
|
+
The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
|
|
994
1002
|
max_question_len (`int`, *optional*):
|
|
995
1003
|
The maximum length of the question after tokenization. It will be truncated if needed.
|
|
996
1004
|
max_seq_len (`int`, *optional*):
|
|
997
|
-
The maximum length of the total sentence (context + question) in tokens of each chunk
|
|
998
|
-
|
|
999
|
-
overlap) if needed.
|
|
1005
|
+
The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
|
|
1006
|
+
model. The context will be split in several chunks (using doc_stride as overlap) if needed.
|
|
1000
1007
|
top_k (`int`, *optional*):
|
|
1001
|
-
The number of answers to return (will be chosen by order of likelihood). Can return less
|
|
1002
|
-
|
|
1003
|
-
word_boxes (`List[Union[List[float], str
|
|
1004
|
-
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
|
|
1005
|
-
|
|
1008
|
+
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
1009
|
+
answers if there are not enough options available within the context.
|
|
1010
|
+
word_boxes (`List[Union[List[float], str`, *optional*):
|
|
1011
|
+
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
1012
|
+
step and use the provided bounding boxes instead.
|
|
1006
1013
|
Returns:
|
|
1007
1014
|
`List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
1008
1015
|
|
|
@@ -1019,7 +1026,7 @@ class AsyncInferenceClient:
|
|
|
1019
1026
|
>>> from huggingface_hub import AsyncInferenceClient
|
|
1020
1027
|
>>> client = AsyncInferenceClient()
|
|
1021
1028
|
>>> await client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
|
|
1022
|
-
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16
|
|
1029
|
+
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
|
|
1023
1030
|
```
|
|
1024
1031
|
"""
|
|
1025
1032
|
inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
@@ -1121,11 +1128,10 @@ class AsyncInferenceClient:
|
|
|
1121
1128
|
model (`str`, *optional*):
|
|
1122
1129
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1123
1130
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1124
|
-
targets (`List[str
|
|
1125
|
-
When passed, the model will limit the scores to the passed targets instead of looking up
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
slower).
|
|
1131
|
+
targets (`List[str`, *optional*):
|
|
1132
|
+
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1133
|
+
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1134
|
+
resulting token will be used (with a warning, and that might be slower).
|
|
1129
1135
|
top_k (`int`, *optional*):
|
|
1130
1136
|
When passed, overrides the number of predictions to return.
|
|
1131
1137
|
Returns:
|
|
@@ -1160,7 +1166,7 @@ class AsyncInferenceClient:
|
|
|
1160
1166
|
image: ContentT,
|
|
1161
1167
|
*,
|
|
1162
1168
|
model: Optional[str] = None,
|
|
1163
|
-
function_to_apply: Optional[
|
|
1169
|
+
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1164
1170
|
top_k: Optional[int] = None,
|
|
1165
1171
|
) -> List[ImageClassificationOutputElement]:
|
|
1166
1172
|
"""
|
|
@@ -1172,8 +1178,8 @@ class AsyncInferenceClient:
|
|
|
1172
1178
|
model (`str`, *optional*):
|
|
1173
1179
|
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
|
|
1174
1180
|
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
|
|
1175
|
-
function_to_apply (`
|
|
1176
|
-
The function to apply to the
|
|
1181
|
+
function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
|
|
1182
|
+
The function to apply to the model outputs in order to retrieve the scores.
|
|
1177
1183
|
top_k (`int`, *optional*):
|
|
1178
1184
|
When specified, limits the output to the top K most probable classes.
|
|
1179
1185
|
Returns:
|
|
@@ -1206,7 +1212,7 @@ class AsyncInferenceClient:
|
|
|
1206
1212
|
model: Optional[str] = None,
|
|
1207
1213
|
mask_threshold: Optional[float] = None,
|
|
1208
1214
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1209
|
-
subtask: Optional[
|
|
1215
|
+
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1210
1216
|
threshold: Optional[float] = None,
|
|
1211
1217
|
) -> List[ImageSegmentationOutputElement]:
|
|
1212
1218
|
"""
|
|
@@ -1228,7 +1234,7 @@ class AsyncInferenceClient:
|
|
|
1228
1234
|
Threshold to use when turning the predicted masks into binary values.
|
|
1229
1235
|
overlap_mask_area_threshold (`float`, *optional*):
|
|
1230
1236
|
Mask overlap threshold to eliminate small, disconnected segments.
|
|
1231
|
-
subtask (`
|
|
1237
|
+
subtask (`"ImageSegmentationSubtask"`, *optional*):
|
|
1232
1238
|
Segmentation task to be performed, depending on model capabilities.
|
|
1233
1239
|
threshold (`float`, *optional*):
|
|
1234
1240
|
Probability threshold to filter out predicted masks.
|
|
@@ -1268,12 +1274,11 @@ class AsyncInferenceClient:
|
|
|
1268
1274
|
image: ContentT,
|
|
1269
1275
|
prompt: Optional[str] = None,
|
|
1270
1276
|
*,
|
|
1271
|
-
negative_prompt: Optional[str] = None,
|
|
1272
|
-
height: Optional[int] = None,
|
|
1273
|
-
width: Optional[int] = None,
|
|
1277
|
+
negative_prompt: Optional[List[str]] = None,
|
|
1274
1278
|
num_inference_steps: Optional[int] = None,
|
|
1275
1279
|
guidance_scale: Optional[float] = None,
|
|
1276
1280
|
model: Optional[str] = None,
|
|
1281
|
+
target_size: Optional[ImageToImageTargetSize] = None,
|
|
1277
1282
|
**kwargs,
|
|
1278
1283
|
) -> "Image":
|
|
1279
1284
|
"""
|
|
@@ -1290,21 +1295,19 @@ class AsyncInferenceClient:
|
|
|
1290
1295
|
The input image for translation. It can be raw bytes, an image file, or a URL to an online image.
|
|
1291
1296
|
prompt (`str`, *optional*):
|
|
1292
1297
|
The text prompt to guide the image generation.
|
|
1293
|
-
negative_prompt (`str`, *optional*):
|
|
1294
|
-
|
|
1295
|
-
height (`int`, *optional*):
|
|
1296
|
-
The height in pixels of the generated image.
|
|
1297
|
-
width (`int`, *optional*):
|
|
1298
|
-
The width in pixels of the generated image.
|
|
1298
|
+
negative_prompt (`List[str]`, *optional*):
|
|
1299
|
+
One or several prompt to guide what NOT to include in image generation.
|
|
1299
1300
|
num_inference_steps (`int`, *optional*):
|
|
1300
|
-
The number of denoising steps. More denoising steps usually lead to a higher
|
|
1301
|
-
expense of slower inference.
|
|
1301
|
+
For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
|
|
1302
|
+
quality image at the expense of slower inference.
|
|
1302
1303
|
guidance_scale (`float`, *optional*):
|
|
1303
|
-
|
|
1304
|
-
|
|
1304
|
+
For diffusion models. A higher guidance scale value encourages the model to generate images closely
|
|
1305
|
+
linked to the text prompt at the expense of lower image quality.
|
|
1305
1306
|
model (`str`, *optional*):
|
|
1306
1307
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1307
1308
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
1309
|
+
target_size (`ImageToImageTargetSize`, *optional*):
|
|
1310
|
+
The size in pixel of the output image.
|
|
1308
1311
|
|
|
1309
1312
|
Returns:
|
|
1310
1313
|
`Image`: The translated image.
|
|
@@ -1327,8 +1330,7 @@ class AsyncInferenceClient:
|
|
|
1327
1330
|
parameters = {
|
|
1328
1331
|
"prompt": prompt,
|
|
1329
1332
|
"negative_prompt": negative_prompt,
|
|
1330
|
-
"
|
|
1331
|
-
"width": width,
|
|
1333
|
+
"target_size": target_size,
|
|
1332
1334
|
"num_inference_steps": num_inference_steps,
|
|
1333
1335
|
"guidance_scale": guidance_scale,
|
|
1334
1336
|
**kwargs,
|
|
@@ -1537,26 +1539,24 @@ class AsyncInferenceClient:
|
|
|
1537
1539
|
The model to use for the question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1538
1540
|
a deployed Inference Endpoint.
|
|
1539
1541
|
align_to_words (`bool`, *optional*):
|
|
1540
|
-
Attempts to align the answer to real words. Improves quality on space separated
|
|
1541
|
-
|
|
1542
|
+
Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt
|
|
1543
|
+
on non-space-separated languages (like Japanese or Chinese)
|
|
1542
1544
|
doc_stride (`int`, *optional*):
|
|
1543
|
-
If the context is too long to fit with the question for the model, it will be split in
|
|
1544
|
-
|
|
1545
|
+
If the context is too long to fit with the question for the model, it will be split in several chunks
|
|
1546
|
+
with some overlap. This argument controls the size of that overlap.
|
|
1545
1547
|
handle_impossible_answer (`bool`, *optional*):
|
|
1546
1548
|
Whether to accept impossible as an answer.
|
|
1547
1549
|
max_answer_len (`int`, *optional*):
|
|
1548
|
-
The maximum length of predicted answers (e.g., only answers with a shorter length are
|
|
1549
|
-
considered).
|
|
1550
|
+
The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
|
|
1550
1551
|
max_question_len (`int`, *optional*):
|
|
1551
1552
|
The maximum length of the question after tokenization. It will be truncated if needed.
|
|
1552
1553
|
max_seq_len (`int`, *optional*):
|
|
1553
|
-
The maximum length of the total sentence (context + question) in tokens of each chunk
|
|
1554
|
-
|
|
1555
|
-
overlap) if needed.
|
|
1554
|
+
The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
|
|
1555
|
+
model. The context will be split in several chunks (using docStride as overlap) if needed.
|
|
1556
1556
|
top_k (`int`, *optional*):
|
|
1557
|
-
The number of answers to return (will be chosen by order of likelihood). Note that we
|
|
1558
|
-
|
|
1559
|
-
|
|
1557
|
+
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
1558
|
+
topk answers if there are not enough options available within the context.
|
|
1559
|
+
|
|
1560
1560
|
Returns:
|
|
1561
1561
|
Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
|
|
1562
1562
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
@@ -1660,7 +1660,7 @@ class AsyncInferenceClient:
|
|
|
1660
1660
|
model: Optional[str] = None,
|
|
1661
1661
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1662
1662
|
generate_parameters: Optional[Dict[str, Any]] = None,
|
|
1663
|
-
truncation: Optional[
|
|
1663
|
+
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1664
1664
|
) -> SummarizationOutput:
|
|
1665
1665
|
"""
|
|
1666
1666
|
Generate a summary of a given text using a specified model.
|
|
@@ -1678,7 +1678,7 @@ class AsyncInferenceClient:
|
|
|
1678
1678
|
Whether to clean up the potential extra spaces in the text output.
|
|
1679
1679
|
generate_parameters (`Dict[str, Any]`, *optional*):
|
|
1680
1680
|
Additional parametrization of the text generation algorithm.
|
|
1681
|
-
truncation (`
|
|
1681
|
+
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1682
1682
|
The truncation strategy to use.
|
|
1683
1683
|
Returns:
|
|
1684
1684
|
[`SummarizationOutput`]: The generated summary text.
|
|
@@ -1714,7 +1714,9 @@ class AsyncInferenceClient:
|
|
|
1714
1714
|
query: str,
|
|
1715
1715
|
*,
|
|
1716
1716
|
model: Optional[str] = None,
|
|
1717
|
-
|
|
1717
|
+
padding: Optional["Padding"] = None,
|
|
1718
|
+
sequential: Optional[bool] = None,
|
|
1719
|
+
truncation: Optional[bool] = None,
|
|
1718
1720
|
) -> TableQuestionAnsweringOutputElement:
|
|
1719
1721
|
"""
|
|
1720
1722
|
Retrieve the answer to a question from information given in a table.
|
|
@@ -1728,8 +1730,14 @@ class AsyncInferenceClient:
|
|
|
1728
1730
|
model (`str`):
|
|
1729
1731
|
The model to use for the table-question-answering task. Can be a model ID hosted on the Hugging Face
|
|
1730
1732
|
Hub or a URL to a deployed Inference Endpoint.
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
+
padding (`"Padding"`, *optional*):
|
|
1734
|
+
Activates and controls padding.
|
|
1735
|
+
sequential (`bool`, *optional*):
|
|
1736
|
+
Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
|
|
1737
|
+
inference to be done sequentially to extract relations within sequences, given their conversational
|
|
1738
|
+
nature.
|
|
1739
|
+
truncation (`bool`, *optional*):
|
|
1740
|
+
Activates and controls truncation.
|
|
1733
1741
|
|
|
1734
1742
|
Returns:
|
|
1735
1743
|
[`TableQuestionAnsweringOutputElement`]: a table question answering output containing the answer, coordinates, cells and the aggregator used.
|
|
@@ -1751,6 +1759,11 @@ class AsyncInferenceClient:
|
|
|
1751
1759
|
TableQuestionAnsweringOutputElement(answer='36542', coordinates=[[0, 1]], cells=['36542'], aggregator='AVERAGE')
|
|
1752
1760
|
```
|
|
1753
1761
|
"""
|
|
1762
|
+
parameters = {
|
|
1763
|
+
"padding": padding,
|
|
1764
|
+
"sequential": sequential,
|
|
1765
|
+
"truncation": truncation,
|
|
1766
|
+
}
|
|
1754
1767
|
inputs = {
|
|
1755
1768
|
"query": query,
|
|
1756
1769
|
"table": table,
|
|
@@ -1875,7 +1888,7 @@ class AsyncInferenceClient:
|
|
|
1875
1888
|
top_k (`int`, *optional*):
|
|
1876
1889
|
When specified, limits the output to the top K most probable classes.
|
|
1877
1890
|
function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
|
|
1878
|
-
The function to apply to the
|
|
1891
|
+
The function to apply to the model outputs in order to retrieve the scores.
|
|
1879
1892
|
|
|
1880
1893
|
Returns:
|
|
1881
1894
|
`List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
@@ -2136,7 +2149,7 @@ class AsyncInferenceClient:
|
|
|
2136
2149
|
grammar ([`TextGenerationInputGrammarType`], *optional*):
|
|
2137
2150
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
2138
2151
|
max_new_tokens (`int`, *optional*):
|
|
2139
|
-
Maximum number of generated tokens
|
|
2152
|
+
Maximum number of generated tokens. Defaults to 100.
|
|
2140
2153
|
repetition_penalty (`float`, *optional*):
|
|
2141
2154
|
The parameter for repetition penalty. 1.0 means no penalty. See [this
|
|
2142
2155
|
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
|
@@ -2411,10 +2424,10 @@ class AsyncInferenceClient:
|
|
|
2411
2424
|
self,
|
|
2412
2425
|
prompt: str,
|
|
2413
2426
|
*,
|
|
2414
|
-
negative_prompt: Optional[str] = None,
|
|
2427
|
+
negative_prompt: Optional[List[str]] = None,
|
|
2415
2428
|
height: Optional[float] = None,
|
|
2416
2429
|
width: Optional[float] = None,
|
|
2417
|
-
num_inference_steps: Optional[
|
|
2430
|
+
num_inference_steps: Optional[int] = None,
|
|
2418
2431
|
guidance_scale: Optional[float] = None,
|
|
2419
2432
|
model: Optional[str] = None,
|
|
2420
2433
|
scheduler: Optional[str] = None,
|
|
@@ -2434,8 +2447,8 @@ class AsyncInferenceClient:
|
|
|
2434
2447
|
Args:
|
|
2435
2448
|
prompt (`str`):
|
|
2436
2449
|
The prompt to generate an image from.
|
|
2437
|
-
negative_prompt (`str`, *optional*):
|
|
2438
|
-
|
|
2450
|
+
negative_prompt (`List[str`, *optional*):
|
|
2451
|
+
One or several prompt to guide what NOT to include in image generation.
|
|
2439
2452
|
height (`float`, *optional*):
|
|
2440
2453
|
The height in pixels of the image to generate.
|
|
2441
2454
|
width (`float`, *optional*):
|
|
@@ -2444,8 +2457,8 @@ class AsyncInferenceClient:
|
|
|
2444
2457
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
|
2445
2458
|
expense of slower inference.
|
|
2446
2459
|
guidance_scale (`float`, *optional*):
|
|
2447
|
-
|
|
2448
|
-
|
|
2460
|
+
A higher guidance scale value encourages the model to generate images closely linked to the text
|
|
2461
|
+
prompt, but values too high may cause saturation and other artifacts.
|
|
2449
2462
|
model (`str`, *optional*):
|
|
2450
2463
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
2451
2464
|
Inference Endpoint. If not provided, the default recommended text-to-image model will be used.
|
|
@@ -2533,44 +2546,42 @@ class AsyncInferenceClient:
|
|
|
2533
2546
|
Defaults to None.
|
|
2534
2547
|
do_sample (`bool`, *optional*):
|
|
2535
2548
|
Whether to use sampling instead of greedy decoding when generating new tokens.
|
|
2536
|
-
early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"`, *optional*):
|
|
2549
|
+
early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"]`, *optional*):
|
|
2537
2550
|
Controls the stopping condition for beam-based methods.
|
|
2538
2551
|
epsilon_cutoff (`float`, *optional*):
|
|
2539
|
-
If set to float strictly between 0 and 1, only tokens with a conditional probability
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
|
|
2552
|
+
If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
|
|
2553
|
+
epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on
|
|
2554
|
+
the size of the model. See [Truncation Sampling as Language Model
|
|
2555
|
+
Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
|
2543
2556
|
eta_cutoff (`float`, *optional*):
|
|
2544
|
-
Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
for more details.
|
|
2557
|
+
Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly
|
|
2558
|
+
between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff)
|
|
2559
|
+
* exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token
|
|
2560
|
+
probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3,
|
|
2561
|
+
depending on the size of the model. See [Truncation Sampling as Language Model
|
|
2562
|
+
Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
|
2551
2563
|
max_length (`int`, *optional*):
|
|
2552
2564
|
The maximum length (in tokens) of the generated text, including the input.
|
|
2553
2565
|
max_new_tokens (`int`, *optional*):
|
|
2554
|
-
The maximum number of tokens to generate. Takes precedence over
|
|
2566
|
+
The maximum number of tokens to generate. Takes precedence over max_length.
|
|
2555
2567
|
min_length (`int`, *optional*):
|
|
2556
2568
|
The minimum length (in tokens) of the generated text, including the input.
|
|
2557
2569
|
min_new_tokens (`int`, *optional*):
|
|
2558
|
-
The minimum number of tokens to generate. Takes precedence over
|
|
2570
|
+
The minimum number of tokens to generate. Takes precedence over min_length.
|
|
2559
2571
|
num_beam_groups (`int`, *optional*):
|
|
2560
|
-
Number of groups to divide num_beams into in order to ensure diversity among different
|
|
2561
|
-
|
|
2572
|
+
Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
|
|
2573
|
+
See [this paper](https://hf.co/papers/1610.02424) for more details.
|
|
2562
2574
|
num_beams (`int`, *optional*):
|
|
2563
2575
|
Number of beams to use for beam search.
|
|
2564
2576
|
penalty_alpha (`float`, *optional*):
|
|
2565
|
-
The value balances the model confidence and the degeneration penalty in contrastive
|
|
2566
|
-
search decoding.
|
|
2577
|
+
The value balances the model confidence and the degeneration penalty in contrastive search decoding.
|
|
2567
2578
|
temperature (`float`, *optional*):
|
|
2568
2579
|
The value used to modulate the next token probabilities.
|
|
2569
2580
|
top_k (`int`, *optional*):
|
|
2570
2581
|
The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
|
2571
2582
|
top_p (`float`, *optional*):
|
|
2572
|
-
If set to float < 1, only the smallest set of most probable tokens with probabilities
|
|
2573
|
-
|
|
2583
|
+
If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
|
|
2584
|
+
top_p or higher are kept for generation.
|
|
2574
2585
|
typical_p (`float`, *optional*):
|
|
2575
2586
|
Local typicality measures how similar the conditional probability of predicting a target token next is
|
|
2576
2587
|
to the expected conditional probability of predicting a random token next, given the partial text
|
|
@@ -2627,7 +2638,7 @@ class AsyncInferenceClient:
|
|
|
2627
2638
|
text: str,
|
|
2628
2639
|
*,
|
|
2629
2640
|
model: Optional[str] = None,
|
|
2630
|
-
aggregation_strategy: Optional[
|
|
2641
|
+
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2631
2642
|
ignore_labels: Optional[List[str]] = None,
|
|
2632
2643
|
stride: Optional[int] = None,
|
|
2633
2644
|
) -> List[TokenClassificationOutputElement]:
|
|
@@ -2642,10 +2653,10 @@ class AsyncInferenceClient:
|
|
|
2642
2653
|
The model to use for the token classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
2643
2654
|
a deployed Inference Endpoint. If not provided, the default recommended token classification model will be used.
|
|
2644
2655
|
Defaults to None.
|
|
2645
|
-
aggregation_strategy (`
|
|
2646
|
-
The strategy used to fuse tokens based on model predictions
|
|
2647
|
-
ignore_labels (`List[str
|
|
2648
|
-
A list of labels to ignore
|
|
2656
|
+
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2657
|
+
The strategy used to fuse tokens based on model predictions
|
|
2658
|
+
ignore_labels (`List[str`, *optional*):
|
|
2659
|
+
A list of labels to ignore
|
|
2649
2660
|
stride (`int`, *optional*):
|
|
2650
2661
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2651
2662
|
|
|
@@ -2704,7 +2715,7 @@ class AsyncInferenceClient:
|
|
|
2704
2715
|
src_lang: Optional[str] = None,
|
|
2705
2716
|
tgt_lang: Optional[str] = None,
|
|
2706
2717
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2707
|
-
truncation: Optional[
|
|
2718
|
+
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
2708
2719
|
generate_parameters: Optional[Dict[str, Any]] = None,
|
|
2709
2720
|
) -> TranslationOutput:
|
|
2710
2721
|
"""
|
|
@@ -2728,7 +2739,7 @@ class AsyncInferenceClient:
|
|
|
2728
2739
|
Target language to translate to. Required for models that can translate to multiple languages.
|
|
2729
2740
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
2730
2741
|
Whether to clean up the potential extra spaces in the text output.
|
|
2731
|
-
truncation (`
|
|
2742
|
+
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
2732
2743
|
The truncation strategy to use.
|
|
2733
2744
|
generate_parameters (`Dict[str, Any]`, *optional*):
|
|
2734
2745
|
Additional parametrization of the text generation algorithm.
|
|
@@ -2752,13 +2763,13 @@ class AsyncInferenceClient:
|
|
|
2752
2763
|
>>> await client.translation("My name is Wolfgang and I live in Berlin")
|
|
2753
2764
|
'Mein Name ist Wolfgang und ich lebe in Berlin.'
|
|
2754
2765
|
>>> await client.translation("My name is Wolfgang and I live in Berlin", model="Helsinki-NLP/opus-mt-en-fr")
|
|
2755
|
-
TranslationOutput(translation_text='Je m
|
|
2766
|
+
TranslationOutput(translation_text='Je m'appelle Wolfgang et je vis à Berlin.')
|
|
2756
2767
|
```
|
|
2757
2768
|
|
|
2758
2769
|
Specifying languages:
|
|
2759
2770
|
```py
|
|
2760
2771
|
>>> client.translation("My name is Sarah Jessica Parker but you can call me Jessica", model="facebook/mbart-large-50-many-to-many-mmt", src_lang="en_XX", tgt_lang="fr_XX")
|
|
2761
|
-
"Mon nom est Sarah Jessica Parker mais vous pouvez m
|
|
2772
|
+
"Mon nom est Sarah Jessica Parker mais vous pouvez m'appeler Jessica"
|
|
2762
2773
|
```
|
|
2763
2774
|
"""
|
|
2764
2775
|
# Throw error if only one of `src_lang` and `tgt_lang` was given
|
|
@@ -2799,9 +2810,8 @@ class AsyncInferenceClient:
|
|
|
2799
2810
|
a deployed Inference Endpoint. If not provided, the default recommended visual question answering model will be used.
|
|
2800
2811
|
Defaults to None.
|
|
2801
2812
|
top_k (`int`, *optional*):
|
|
2802
|
-
The number of answers to return (will be chosen by order of likelihood). Note that we
|
|
2803
|
-
|
|
2804
|
-
context.
|
|
2813
|
+
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
2814
|
+
topk answers if there are not enough options available within the context.
|
|
2805
2815
|
Returns:
|
|
2806
2816
|
`List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
2807
2817
|
|
|
@@ -2832,14 +2842,22 @@ class AsyncInferenceClient:
|
|
|
2832
2842
|
response = await self.post(json=payload, model=model, task="visual-question-answering")
|
|
2833
2843
|
return VisualQuestionAnsweringOutputElement.parse_obj_as_list(response)
|
|
2834
2844
|
|
|
2845
|
+
@_deprecate_arguments(
|
|
2846
|
+
version="0.30.0",
|
|
2847
|
+
deprecated_args=["labels"],
|
|
2848
|
+
custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
|
|
2849
|
+
)
|
|
2835
2850
|
async def zero_shot_classification(
|
|
2836
2851
|
self,
|
|
2837
2852
|
text: str,
|
|
2838
|
-
|
|
2853
|
+
# temporarily keeping it optional for backward compatibility.
|
|
2854
|
+
candidate_labels: List[str] = None, # type: ignore
|
|
2839
2855
|
*,
|
|
2840
|
-
multi_label: bool = False,
|
|
2856
|
+
multi_label: Optional[bool] = False,
|
|
2841
2857
|
hypothesis_template: Optional[str] = None,
|
|
2842
2858
|
model: Optional[str] = None,
|
|
2859
|
+
# deprecated argument
|
|
2860
|
+
labels: List[str] = None, # type: ignore
|
|
2843
2861
|
) -> List[ZeroShotClassificationOutputElement]:
|
|
2844
2862
|
"""
|
|
2845
2863
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
@@ -2847,20 +2865,22 @@ class AsyncInferenceClient:
|
|
|
2847
2865
|
Args:
|
|
2848
2866
|
text (`str`):
|
|
2849
2867
|
The input text to classify.
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2868
|
+
candidate_labels (`List[str]`):
|
|
2869
|
+
The set of possible class labels to classify the text into.
|
|
2870
|
+
labels (`List[str]`, *optional*):
|
|
2871
|
+
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
2872
|
+
multi_label (`bool`, *optional*):
|
|
2873
|
+
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
2874
|
+
the label likelihoods for each sequence is 1. If true, the labels are considered independent and
|
|
2875
|
+
probabilities are normalized for each candidate.
|
|
2855
2876
|
hypothesis_template (`str`, *optional*):
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
For example, with hypothesis_template="This text is about {}." and labels=["economics", "politics"], the system internally creates the two hypotheses "This text is about economics." and "This text is about politics.".
|
|
2859
|
-
The model then evaluates for both hypotheses if they are entailed in the provided `text` or not.
|
|
2877
|
+
The sentence used in conjunction with `candidate_labels` to attempt the text classification by
|
|
2878
|
+
replacing the placeholder with the candidate labels.
|
|
2860
2879
|
model (`str`, *optional*):
|
|
2861
2880
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
2862
2881
|
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
|
|
2863
2882
|
|
|
2883
|
+
|
|
2864
2884
|
Returns:
|
|
2865
2885
|
`List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
2866
2886
|
|
|
@@ -2918,9 +2938,17 @@ class AsyncInferenceClient:
|
|
|
2918
2938
|
]
|
|
2919
2939
|
```
|
|
2920
2940
|
"""
|
|
2921
|
-
|
|
2941
|
+
# handle deprecation
|
|
2942
|
+
if labels is not None:
|
|
2943
|
+
if candidate_labels is not None:
|
|
2944
|
+
raise ValueError(
|
|
2945
|
+
"Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
|
|
2946
|
+
)
|
|
2947
|
+
candidate_labels = labels
|
|
2948
|
+
elif candidate_labels is None:
|
|
2949
|
+
raise ValueError("Must specify `candidate_labels`")
|
|
2922
2950
|
parameters = {
|
|
2923
|
-
"candidate_labels":
|
|
2951
|
+
"candidate_labels": candidate_labels,
|
|
2924
2952
|
"multi_label": multi_label,
|
|
2925
2953
|
"hypothesis_template": hypothesis_template,
|
|
2926
2954
|
}
|
|
@@ -2936,13 +2964,21 @@ class AsyncInferenceClient:
|
|
|
2936
2964
|
for label, score in zip(output["labels"], output["scores"])
|
|
2937
2965
|
]
|
|
2938
2966
|
|
|
2967
|
+
@_deprecate_arguments(
|
|
2968
|
+
version="0.30.0",
|
|
2969
|
+
deprecated_args=["labels"],
|
|
2970
|
+
custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
|
|
2971
|
+
)
|
|
2939
2972
|
async def zero_shot_image_classification(
|
|
2940
2973
|
self,
|
|
2941
2974
|
image: ContentT,
|
|
2942
|
-
|
|
2975
|
+
# temporarily keeping it optional for backward compatibility.
|
|
2976
|
+
candidate_labels: List[str] = None, # type: ignore
|
|
2943
2977
|
*,
|
|
2944
2978
|
model: Optional[str] = None,
|
|
2945
2979
|
hypothesis_template: Optional[str] = None,
|
|
2980
|
+
# deprecated argument
|
|
2981
|
+
labels: List[str] = None, # type: ignore
|
|
2946
2982
|
) -> List[ZeroShotImageClassificationOutputElement]:
|
|
2947
2983
|
"""
|
|
2948
2984
|
Provide input image and text labels to predict text labels for the image.
|
|
@@ -2950,14 +2986,17 @@ class AsyncInferenceClient:
|
|
|
2950
2986
|
Args:
|
|
2951
2987
|
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
2952
2988
|
The input image to caption. It can be raw bytes, an image file, or a URL to an online image.
|
|
2953
|
-
|
|
2954
|
-
|
|
2989
|
+
candidate_labels (`List[str]`):
|
|
2990
|
+
The candidate labels for this image
|
|
2991
|
+
labels (`List[str]`, *optional*):
|
|
2992
|
+
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
2955
2993
|
model (`str`, *optional*):
|
|
2956
2994
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
2957
2995
|
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
|
|
2958
2996
|
hypothesis_template (`str`, *optional*):
|
|
2959
|
-
The sentence used in conjunction with `
|
|
2960
|
-
placeholder with the candidate labels.
|
|
2997
|
+
The sentence used in conjunction with `candidate_labels` to attempt the image classification by
|
|
2998
|
+
replacing the placeholder with the candidate labels.
|
|
2999
|
+
|
|
2961
3000
|
Returns:
|
|
2962
3001
|
`List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
2963
3002
|
|
|
@@ -2980,13 +3019,23 @@ class AsyncInferenceClient:
|
|
|
2980
3019
|
[ZeroShotImageClassificationOutputElement(label='dog', score=0.956),...]
|
|
2981
3020
|
```
|
|
2982
3021
|
"""
|
|
3022
|
+
# handle deprecation
|
|
3023
|
+
if labels is not None:
|
|
3024
|
+
if candidate_labels is not None:
|
|
3025
|
+
raise ValueError(
|
|
3026
|
+
"Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
|
|
3027
|
+
)
|
|
3028
|
+
candidate_labels = labels
|
|
3029
|
+
elif candidate_labels is None:
|
|
3030
|
+
raise ValueError("Must specify `candidate_labels`")
|
|
2983
3031
|
# Raise ValueError if input is less than 2 labels
|
|
2984
|
-
if len(
|
|
3032
|
+
if len(candidate_labels) < 2:
|
|
2985
3033
|
raise ValueError("You must specify at least 2 classes to compare.")
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
3034
|
+
parameters = {
|
|
3035
|
+
"candidate_labels": candidate_labels,
|
|
3036
|
+
"hypothesis_template": hypothesis_template,
|
|
3037
|
+
}
|
|
3038
|
+
payload = _prepare_payload(image, parameters=parameters, expect_binary=True)
|
|
2990
3039
|
response = await self.post(
|
|
2991
3040
|
**payload,
|
|
2992
3041
|
model=model,
|