huggingface-hub 0.26.3__py3-none-any.whl → 0.27.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +49 -23
- huggingface_hub/_commit_scheduler.py +30 -4
- huggingface_hub/_local_folder.py +0 -4
- huggingface_hub/_login.py +38 -54
- huggingface_hub/_snapshot_download.py +6 -3
- huggingface_hub/_tensorboard_logger.py +2 -3
- huggingface_hub/_upload_large_folder.py +1 -1
- huggingface_hub/errors.py +19 -0
- huggingface_hub/fastai_utils.py +3 -2
- huggingface_hub/file_download.py +10 -12
- huggingface_hub/hf_api.py +102 -498
- huggingface_hub/hf_file_system.py +274 -35
- huggingface_hub/hub_mixin.py +5 -25
- huggingface_hub/inference/_client.py +185 -136
- huggingface_hub/inference/_common.py +2 -2
- huggingface_hub/inference/_generated/_async_client.py +186 -137
- huggingface_hub/inference/_generated/types/__init__.py +31 -10
- huggingface_hub/inference/_generated/types/audio_classification.py +3 -5
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +4 -8
- huggingface_hub/inference/_generated/types/chat_completion.py +8 -5
- huggingface_hub/inference/_generated/types/depth_estimation.py +1 -1
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -6
- huggingface_hub/inference/_generated/types/feature_extraction.py +1 -1
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -4
- huggingface_hub/inference/_generated/types/image_classification.py +3 -5
- huggingface_hub/inference/_generated/types/image_segmentation.py +2 -4
- huggingface_hub/inference/_generated/types/image_to_image.py +2 -4
- huggingface_hub/inference/_generated/types/image_to_text.py +4 -8
- huggingface_hub/inference/_generated/types/object_detection.py +2 -4
- huggingface_hub/inference/_generated/types/question_answering.py +2 -4
- huggingface_hub/inference/_generated/types/sentence_similarity.py +1 -1
- huggingface_hub/inference/_generated/types/summarization.py +2 -4
- huggingface_hub/inference/_generated/types/table_question_answering.py +21 -3
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -4
- huggingface_hub/inference/_generated/types/text_classification.py +4 -10
- huggingface_hub/inference/_generated/types/text_to_audio.py +6 -10
- huggingface_hub/inference/_generated/types/text_to_image.py +2 -4
- huggingface_hub/inference/_generated/types/text_to_speech.py +6 -10
- huggingface_hub/inference/_generated/types/token_classification.py +11 -12
- huggingface_hub/inference/_generated/types/translation.py +2 -4
- huggingface_hub/inference/_generated/types/video_classification.py +3 -4
- huggingface_hub/inference/_generated/types/visual_question_answering.py +2 -5
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +8 -18
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +9 -19
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +7 -9
- huggingface_hub/keras_mixin.py +3 -2
- huggingface_hub/lfs.py +2 -5
- huggingface_hub/repocard_data.py +4 -4
- huggingface_hub/serialization/__init__.py +2 -0
- huggingface_hub/serialization/_dduf.py +387 -0
- huggingface_hub/serialization/_torch.py +407 -25
- huggingface_hub/utils/_cache_manager.py +1 -1
- huggingface_hub/utils/_headers.py +9 -25
- huggingface_hub/utils/tqdm.py +15 -0
- {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc0.dist-info}/METADATA +8 -3
- {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc0.dist-info}/RECORD +60 -60
- huggingface_hub/_multi_commits.py +0 -306
- {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -70,17 +70,24 @@ from huggingface_hub.inference._generated.types import (
|
|
|
70
70
|
AutomaticSpeechRecognitionOutput,
|
|
71
71
|
ChatCompletionInputGrammarType,
|
|
72
72
|
ChatCompletionInputStreamOptions,
|
|
73
|
-
|
|
73
|
+
ChatCompletionInputTool,
|
|
74
|
+
ChatCompletionInputToolChoiceClass,
|
|
75
|
+
ChatCompletionInputToolChoiceEnum,
|
|
74
76
|
ChatCompletionOutput,
|
|
75
77
|
ChatCompletionStreamOutput,
|
|
76
78
|
DocumentQuestionAnsweringOutputElement,
|
|
77
79
|
FillMaskOutputElement,
|
|
78
80
|
ImageClassificationOutputElement,
|
|
81
|
+
ImageClassificationOutputTransform,
|
|
79
82
|
ImageSegmentationOutputElement,
|
|
83
|
+
ImageSegmentationSubtask,
|
|
84
|
+
ImageToImageTargetSize,
|
|
80
85
|
ImageToTextOutput,
|
|
81
86
|
ObjectDetectionOutputElement,
|
|
87
|
+
Padding,
|
|
82
88
|
QuestionAnsweringOutputElement,
|
|
83
89
|
SummarizationOutput,
|
|
90
|
+
SummarizationTruncationStrategy,
|
|
84
91
|
TableQuestionAnsweringOutputElement,
|
|
85
92
|
TextClassificationOutputElement,
|
|
86
93
|
TextClassificationOutputTransform,
|
|
@@ -89,9 +96,10 @@ from huggingface_hub.inference._generated.types import (
|
|
|
89
96
|
TextGenerationStreamOutput,
|
|
90
97
|
TextToImageTargetSize,
|
|
91
98
|
TextToSpeechEarlyStoppingEnum,
|
|
99
|
+
TokenClassificationAggregationStrategy,
|
|
92
100
|
TokenClassificationOutputElement,
|
|
93
|
-
ToolElement,
|
|
94
101
|
TranslationOutput,
|
|
102
|
+
TranslationTruncationStrategy,
|
|
95
103
|
VisualQuestionAnsweringOutputElement,
|
|
96
104
|
ZeroShotClassificationOutputElement,
|
|
97
105
|
ZeroShotImageClassificationOutputElement,
|
|
@@ -178,7 +186,9 @@ class InferenceClient:
|
|
|
178
186
|
|
|
179
187
|
self.model: Optional[str] = model
|
|
180
188
|
self.token: Union[str, bool, None] = token if token is not None else api_key
|
|
181
|
-
self.headers = CaseInsensitiveDict(
|
|
189
|
+
self.headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
|
|
190
|
+
build_hf_headers(token=self.token) # 'authorization' + 'user-agent'
|
|
191
|
+
)
|
|
182
192
|
if headers is not None:
|
|
183
193
|
self.headers.update(headers)
|
|
184
194
|
self.cookies = cookies
|
|
@@ -341,7 +351,7 @@ class InferenceClient:
|
|
|
341
351
|
top_k (`int`, *optional*):
|
|
342
352
|
When specified, limits the output to the top K most probable classes.
|
|
343
353
|
function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
|
|
344
|
-
The function to apply to the
|
|
354
|
+
The function to apply to the model outputs in order to retrieve the scores.
|
|
345
355
|
|
|
346
356
|
Returns:
|
|
347
357
|
`List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
@@ -466,9 +476,9 @@ class InferenceClient:
|
|
|
466
476
|
stop: Optional[List[str]] = None,
|
|
467
477
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
468
478
|
temperature: Optional[float] = None,
|
|
469
|
-
tool_choice: Optional[Union[
|
|
479
|
+
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
470
480
|
tool_prompt: Optional[str] = None,
|
|
471
|
-
tools: Optional[List[
|
|
481
|
+
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
472
482
|
top_logprobs: Optional[int] = None,
|
|
473
483
|
top_p: Optional[float] = None,
|
|
474
484
|
) -> ChatCompletionOutput: ...
|
|
@@ -491,9 +501,9 @@ class InferenceClient:
|
|
|
491
501
|
stop: Optional[List[str]] = None,
|
|
492
502
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
493
503
|
temperature: Optional[float] = None,
|
|
494
|
-
tool_choice: Optional[Union[
|
|
504
|
+
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
495
505
|
tool_prompt: Optional[str] = None,
|
|
496
|
-
tools: Optional[List[
|
|
506
|
+
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
497
507
|
top_logprobs: Optional[int] = None,
|
|
498
508
|
top_p: Optional[float] = None,
|
|
499
509
|
) -> Iterable[ChatCompletionStreamOutput]: ...
|
|
@@ -516,9 +526,9 @@ class InferenceClient:
|
|
|
516
526
|
stop: Optional[List[str]] = None,
|
|
517
527
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
518
528
|
temperature: Optional[float] = None,
|
|
519
|
-
tool_choice: Optional[Union[
|
|
529
|
+
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
520
530
|
tool_prompt: Optional[str] = None,
|
|
521
|
-
tools: Optional[List[
|
|
531
|
+
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
522
532
|
top_logprobs: Optional[int] = None,
|
|
523
533
|
top_p: Optional[float] = None,
|
|
524
534
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
|
|
@@ -541,9 +551,9 @@ class InferenceClient:
|
|
|
541
551
|
stop: Optional[List[str]] = None,
|
|
542
552
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
543
553
|
temperature: Optional[float] = None,
|
|
544
|
-
tool_choice: Optional[Union[
|
|
554
|
+
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
545
555
|
tool_prompt: Optional[str] = None,
|
|
546
|
-
tools: Optional[List[
|
|
556
|
+
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
547
557
|
top_logprobs: Optional[int] = None,
|
|
548
558
|
top_p: Optional[float] = None,
|
|
549
559
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
|
|
@@ -582,7 +592,7 @@ class InferenceClient:
|
|
|
582
592
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
583
593
|
probabilities of each output token returned in the content of message.
|
|
584
594
|
max_tokens (`int`, *optional*):
|
|
585
|
-
Maximum number of tokens allowed in the response. Defaults to
|
|
595
|
+
Maximum number of tokens allowed in the response. Defaults to 100.
|
|
586
596
|
n (`int`, *optional*):
|
|
587
597
|
UNUSED.
|
|
588
598
|
presence_penalty (`float`, *optional*):
|
|
@@ -609,11 +619,11 @@ class InferenceClient:
|
|
|
609
619
|
top_p (`float`, *optional*):
|
|
610
620
|
Fraction of the most likely next words to sample from.
|
|
611
621
|
Must be between 0 and 1. Defaults to 1.0.
|
|
612
|
-
tool_choice ([`
|
|
622
|
+
tool_choice ([`ChatCompletionInputToolChoiceClass`] or [`ChatCompletionInputToolChoiceEnum`], *optional*):
|
|
613
623
|
The tool to use for the completion. Defaults to "auto".
|
|
614
624
|
tool_prompt (`str`, *optional*):
|
|
615
625
|
A prompt to be appended before the tools.
|
|
616
|
-
tools (List of [`
|
|
626
|
+
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
617
627
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
618
628
|
provide a list of functions the model may generate JSON inputs for.
|
|
619
629
|
|
|
@@ -939,28 +949,25 @@ class InferenceClient:
|
|
|
939
949
|
a deployed Inference Endpoint. If not provided, the default recommended document question answering model will be used.
|
|
940
950
|
Defaults to None.
|
|
941
951
|
doc_stride (`int`, *optional*):
|
|
942
|
-
If the words in the document are too long to fit with the question for the model, it will
|
|
943
|
-
|
|
944
|
-
overlap.
|
|
952
|
+
If the words in the document are too long to fit with the question for the model, it will be split in
|
|
953
|
+
several chunks with some overlap. This argument controls the size of that overlap.
|
|
945
954
|
handle_impossible_answer (`bool`, *optional*):
|
|
946
|
-
Whether to accept impossible as an answer
|
|
955
|
+
Whether to accept impossible as an answer
|
|
947
956
|
lang (`str`, *optional*):
|
|
948
|
-
Language to use while running OCR.
|
|
957
|
+
Language to use while running OCR. Defaults to english.
|
|
949
958
|
max_answer_len (`int`, *optional*):
|
|
950
|
-
The maximum length of predicted answers (e.g., only answers with a shorter length are
|
|
951
|
-
considered).
|
|
959
|
+
The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
|
|
952
960
|
max_question_len (`int`, *optional*):
|
|
953
961
|
The maximum length of the question after tokenization. It will be truncated if needed.
|
|
954
962
|
max_seq_len (`int`, *optional*):
|
|
955
|
-
The maximum length of the total sentence (context + question) in tokens of each chunk
|
|
956
|
-
|
|
957
|
-
overlap) if needed.
|
|
963
|
+
The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
|
|
964
|
+
model. The context will be split in several chunks (using doc_stride as overlap) if needed.
|
|
958
965
|
top_k (`int`, *optional*):
|
|
959
|
-
The number of answers to return (will be chosen by order of likelihood). Can return less
|
|
960
|
-
|
|
961
|
-
word_boxes (`List[Union[List[float], str
|
|
962
|
-
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
|
|
963
|
-
|
|
966
|
+
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
967
|
+
answers if there are not enough options available within the context.
|
|
968
|
+
word_boxes (`List[Union[List[float], str`, *optional*):
|
|
969
|
+
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
970
|
+
step and use the provided bounding boxes instead.
|
|
964
971
|
Returns:
|
|
965
972
|
`List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
966
973
|
|
|
@@ -976,7 +983,7 @@ class InferenceClient:
|
|
|
976
983
|
>>> from huggingface_hub import InferenceClient
|
|
977
984
|
>>> client = InferenceClient()
|
|
978
985
|
>>> client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
|
|
979
|
-
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16
|
|
986
|
+
[DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
|
|
980
987
|
```
|
|
981
988
|
"""
|
|
982
989
|
inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
@@ -1077,11 +1084,10 @@ class InferenceClient:
|
|
|
1077
1084
|
model (`str`, *optional*):
|
|
1078
1085
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1079
1086
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1080
|
-
targets (`List[str
|
|
1081
|
-
When passed, the model will limit the scores to the passed targets instead of looking up
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
slower).
|
|
1087
|
+
targets (`List[str`, *optional*):
|
|
1088
|
+
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1089
|
+
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1090
|
+
resulting token will be used (with a warning, and that might be slower).
|
|
1085
1091
|
top_k (`int`, *optional*):
|
|
1086
1092
|
When passed, overrides the number of predictions to return.
|
|
1087
1093
|
Returns:
|
|
@@ -1115,7 +1121,7 @@ class InferenceClient:
|
|
|
1115
1121
|
image: ContentT,
|
|
1116
1122
|
*,
|
|
1117
1123
|
model: Optional[str] = None,
|
|
1118
|
-
function_to_apply: Optional[
|
|
1124
|
+
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1119
1125
|
top_k: Optional[int] = None,
|
|
1120
1126
|
) -> List[ImageClassificationOutputElement]:
|
|
1121
1127
|
"""
|
|
@@ -1127,8 +1133,8 @@ class InferenceClient:
|
|
|
1127
1133
|
model (`str`, *optional*):
|
|
1128
1134
|
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
|
|
1129
1135
|
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
|
|
1130
|
-
function_to_apply (`
|
|
1131
|
-
The function to apply to the
|
|
1136
|
+
function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
|
|
1137
|
+
The function to apply to the model outputs in order to retrieve the scores.
|
|
1132
1138
|
top_k (`int`, *optional*):
|
|
1133
1139
|
When specified, limits the output to the top K most probable classes.
|
|
1134
1140
|
Returns:
|
|
@@ -1160,7 +1166,7 @@ class InferenceClient:
|
|
|
1160
1166
|
model: Optional[str] = None,
|
|
1161
1167
|
mask_threshold: Optional[float] = None,
|
|
1162
1168
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1163
|
-
subtask: Optional[
|
|
1169
|
+
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1164
1170
|
threshold: Optional[float] = None,
|
|
1165
1171
|
) -> List[ImageSegmentationOutputElement]:
|
|
1166
1172
|
"""
|
|
@@ -1182,7 +1188,7 @@ class InferenceClient:
|
|
|
1182
1188
|
Threshold to use when turning the predicted masks into binary values.
|
|
1183
1189
|
overlap_mask_area_threshold (`float`, *optional*):
|
|
1184
1190
|
Mask overlap threshold to eliminate small, disconnected segments.
|
|
1185
|
-
subtask (`
|
|
1191
|
+
subtask (`"ImageSegmentationSubtask"`, *optional*):
|
|
1186
1192
|
Segmentation task to be performed, depending on model capabilities.
|
|
1187
1193
|
threshold (`float`, *optional*):
|
|
1188
1194
|
Probability threshold to filter out predicted masks.
|
|
@@ -1221,12 +1227,11 @@ class InferenceClient:
|
|
|
1221
1227
|
image: ContentT,
|
|
1222
1228
|
prompt: Optional[str] = None,
|
|
1223
1229
|
*,
|
|
1224
|
-
negative_prompt: Optional[str] = None,
|
|
1225
|
-
height: Optional[int] = None,
|
|
1226
|
-
width: Optional[int] = None,
|
|
1230
|
+
negative_prompt: Optional[List[str]] = None,
|
|
1227
1231
|
num_inference_steps: Optional[int] = None,
|
|
1228
1232
|
guidance_scale: Optional[float] = None,
|
|
1229
1233
|
model: Optional[str] = None,
|
|
1234
|
+
target_size: Optional[ImageToImageTargetSize] = None,
|
|
1230
1235
|
**kwargs,
|
|
1231
1236
|
) -> "Image":
|
|
1232
1237
|
"""
|
|
@@ -1243,21 +1248,19 @@ class InferenceClient:
|
|
|
1243
1248
|
The input image for translation. It can be raw bytes, an image file, or a URL to an online image.
|
|
1244
1249
|
prompt (`str`, *optional*):
|
|
1245
1250
|
The text prompt to guide the image generation.
|
|
1246
|
-
negative_prompt (`str`, *optional*):
|
|
1247
|
-
|
|
1248
|
-
height (`int`, *optional*):
|
|
1249
|
-
The height in pixels of the generated image.
|
|
1250
|
-
width (`int`, *optional*):
|
|
1251
|
-
The width in pixels of the generated image.
|
|
1251
|
+
negative_prompt (`List[str]`, *optional*):
|
|
1252
|
+
One or several prompt to guide what NOT to include in image generation.
|
|
1252
1253
|
num_inference_steps (`int`, *optional*):
|
|
1253
|
-
The number of denoising steps. More denoising steps usually lead to a higher
|
|
1254
|
-
expense of slower inference.
|
|
1254
|
+
For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
|
|
1255
|
+
quality image at the expense of slower inference.
|
|
1255
1256
|
guidance_scale (`float`, *optional*):
|
|
1256
|
-
|
|
1257
|
-
|
|
1257
|
+
For diffusion models. A higher guidance scale value encourages the model to generate images closely
|
|
1258
|
+
linked to the text prompt at the expense of lower image quality.
|
|
1258
1259
|
model (`str`, *optional*):
|
|
1259
1260
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1260
1261
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
1262
|
+
target_size (`ImageToImageTargetSize`, *optional*):
|
|
1263
|
+
The size in pixel of the output image.
|
|
1261
1264
|
|
|
1262
1265
|
Returns:
|
|
1263
1266
|
`Image`: The translated image.
|
|
@@ -1279,8 +1282,7 @@ class InferenceClient:
|
|
|
1279
1282
|
parameters = {
|
|
1280
1283
|
"prompt": prompt,
|
|
1281
1284
|
"negative_prompt": negative_prompt,
|
|
1282
|
-
"
|
|
1283
|
-
"width": width,
|
|
1285
|
+
"target_size": target_size,
|
|
1284
1286
|
"num_inference_steps": num_inference_steps,
|
|
1285
1287
|
"guidance_scale": guidance_scale,
|
|
1286
1288
|
**kwargs,
|
|
@@ -1481,26 +1483,24 @@ class InferenceClient:
|
|
|
1481
1483
|
The model to use for the question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1482
1484
|
a deployed Inference Endpoint.
|
|
1483
1485
|
align_to_words (`bool`, *optional*):
|
|
1484
|
-
Attempts to align the answer to real words. Improves quality on space separated
|
|
1485
|
-
|
|
1486
|
+
Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt
|
|
1487
|
+
on non-space-separated languages (like Japanese or Chinese)
|
|
1486
1488
|
doc_stride (`int`, *optional*):
|
|
1487
|
-
If the context is too long to fit with the question for the model, it will be split in
|
|
1488
|
-
|
|
1489
|
+
If the context is too long to fit with the question for the model, it will be split in several chunks
|
|
1490
|
+
with some overlap. This argument controls the size of that overlap.
|
|
1489
1491
|
handle_impossible_answer (`bool`, *optional*):
|
|
1490
1492
|
Whether to accept impossible as an answer.
|
|
1491
1493
|
max_answer_len (`int`, *optional*):
|
|
1492
|
-
The maximum length of predicted answers (e.g., only answers with a shorter length are
|
|
1493
|
-
considered).
|
|
1494
|
+
The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
|
|
1494
1495
|
max_question_len (`int`, *optional*):
|
|
1495
1496
|
The maximum length of the question after tokenization. It will be truncated if needed.
|
|
1496
1497
|
max_seq_len (`int`, *optional*):
|
|
1497
|
-
The maximum length of the total sentence (context + question) in tokens of each chunk
|
|
1498
|
-
|
|
1499
|
-
overlap) if needed.
|
|
1498
|
+
The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
|
|
1499
|
+
model. The context will be split in several chunks (using docStride as overlap) if needed.
|
|
1500
1500
|
top_k (`int`, *optional*):
|
|
1501
|
-
The number of answers to return (will be chosen by order of likelihood). Note that we
|
|
1502
|
-
|
|
1503
|
-
|
|
1501
|
+
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
1502
|
+
topk answers if there are not enough options available within the context.
|
|
1503
|
+
|
|
1504
1504
|
Returns:
|
|
1505
1505
|
Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
|
|
1506
1506
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
@@ -1602,7 +1602,7 @@ class InferenceClient:
|
|
|
1602
1602
|
model: Optional[str] = None,
|
|
1603
1603
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1604
1604
|
generate_parameters: Optional[Dict[str, Any]] = None,
|
|
1605
|
-
truncation: Optional[
|
|
1605
|
+
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1606
1606
|
) -> SummarizationOutput:
|
|
1607
1607
|
"""
|
|
1608
1608
|
Generate a summary of a given text using a specified model.
|
|
@@ -1620,7 +1620,7 @@ class InferenceClient:
|
|
|
1620
1620
|
Whether to clean up the potential extra spaces in the text output.
|
|
1621
1621
|
generate_parameters (`Dict[str, Any]`, *optional*):
|
|
1622
1622
|
Additional parametrization of the text generation algorithm.
|
|
1623
|
-
truncation (`
|
|
1623
|
+
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1624
1624
|
The truncation strategy to use.
|
|
1625
1625
|
Returns:
|
|
1626
1626
|
[`SummarizationOutput`]: The generated summary text.
|
|
@@ -1655,7 +1655,9 @@ class InferenceClient:
|
|
|
1655
1655
|
query: str,
|
|
1656
1656
|
*,
|
|
1657
1657
|
model: Optional[str] = None,
|
|
1658
|
-
|
|
1658
|
+
padding: Optional["Padding"] = None,
|
|
1659
|
+
sequential: Optional[bool] = None,
|
|
1660
|
+
truncation: Optional[bool] = None,
|
|
1659
1661
|
) -> TableQuestionAnsweringOutputElement:
|
|
1660
1662
|
"""
|
|
1661
1663
|
Retrieve the answer to a question from information given in a table.
|
|
@@ -1669,8 +1671,14 @@ class InferenceClient:
|
|
|
1669
1671
|
model (`str`):
|
|
1670
1672
|
The model to use for the table-question-answering task. Can be a model ID hosted on the Hugging Face
|
|
1671
1673
|
Hub or a URL to a deployed Inference Endpoint.
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
+
padding (`"Padding"`, *optional*):
|
|
1675
|
+
Activates and controls padding.
|
|
1676
|
+
sequential (`bool`, *optional*):
|
|
1677
|
+
Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
|
|
1678
|
+
inference to be done sequentially to extract relations within sequences, given their conversational
|
|
1679
|
+
nature.
|
|
1680
|
+
truncation (`bool`, *optional*):
|
|
1681
|
+
Activates and controls truncation.
|
|
1674
1682
|
|
|
1675
1683
|
Returns:
|
|
1676
1684
|
[`TableQuestionAnsweringOutputElement`]: a table question answering output containing the answer, coordinates, cells and the aggregator used.
|
|
@@ -1691,6 +1699,11 @@ class InferenceClient:
|
|
|
1691
1699
|
TableQuestionAnsweringOutputElement(answer='36542', coordinates=[[0, 1]], cells=['36542'], aggregator='AVERAGE')
|
|
1692
1700
|
```
|
|
1693
1701
|
"""
|
|
1702
|
+
parameters = {
|
|
1703
|
+
"padding": padding,
|
|
1704
|
+
"sequential": sequential,
|
|
1705
|
+
"truncation": truncation,
|
|
1706
|
+
}
|
|
1694
1707
|
inputs = {
|
|
1695
1708
|
"query": query,
|
|
1696
1709
|
"table": table,
|
|
@@ -1813,7 +1826,7 @@ class InferenceClient:
|
|
|
1813
1826
|
top_k (`int`, *optional*):
|
|
1814
1827
|
When specified, limits the output to the top K most probable classes.
|
|
1815
1828
|
function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
|
|
1816
|
-
The function to apply to the
|
|
1829
|
+
The function to apply to the model outputs in order to retrieve the scores.
|
|
1817
1830
|
|
|
1818
1831
|
Returns:
|
|
1819
1832
|
`List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
@@ -2073,7 +2086,7 @@ class InferenceClient:
|
|
|
2073
2086
|
grammar ([`TextGenerationInputGrammarType`], *optional*):
|
|
2074
2087
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
2075
2088
|
max_new_tokens (`int`, *optional*):
|
|
2076
|
-
Maximum number of generated tokens
|
|
2089
|
+
Maximum number of generated tokens. Defaults to 100.
|
|
2077
2090
|
repetition_penalty (`float`, *optional*):
|
|
2078
2091
|
The parameter for repetition penalty. 1.0 means no penalty. See [this
|
|
2079
2092
|
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
|
@@ -2347,10 +2360,10 @@ class InferenceClient:
|
|
|
2347
2360
|
self,
|
|
2348
2361
|
prompt: str,
|
|
2349
2362
|
*,
|
|
2350
|
-
negative_prompt: Optional[str] = None,
|
|
2363
|
+
negative_prompt: Optional[List[str]] = None,
|
|
2351
2364
|
height: Optional[float] = None,
|
|
2352
2365
|
width: Optional[float] = None,
|
|
2353
|
-
num_inference_steps: Optional[
|
|
2366
|
+
num_inference_steps: Optional[int] = None,
|
|
2354
2367
|
guidance_scale: Optional[float] = None,
|
|
2355
2368
|
model: Optional[str] = None,
|
|
2356
2369
|
scheduler: Optional[str] = None,
|
|
@@ -2370,8 +2383,8 @@ class InferenceClient:
|
|
|
2370
2383
|
Args:
|
|
2371
2384
|
prompt (`str`):
|
|
2372
2385
|
The prompt to generate an image from.
|
|
2373
|
-
negative_prompt (`str`, *optional*):
|
|
2374
|
-
|
|
2386
|
+
negative_prompt (`List[str`, *optional*):
|
|
2387
|
+
One or several prompt to guide what NOT to include in image generation.
|
|
2375
2388
|
height (`float`, *optional*):
|
|
2376
2389
|
The height in pixels of the image to generate.
|
|
2377
2390
|
width (`float`, *optional*):
|
|
@@ -2380,8 +2393,8 @@ class InferenceClient:
|
|
|
2380
2393
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
|
2381
2394
|
expense of slower inference.
|
|
2382
2395
|
guidance_scale (`float`, *optional*):
|
|
2383
|
-
|
|
2384
|
-
|
|
2396
|
+
A higher guidance scale value encourages the model to generate images closely linked to the text
|
|
2397
|
+
prompt, but values too high may cause saturation and other artifacts.
|
|
2385
2398
|
model (`str`, *optional*):
|
|
2386
2399
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
2387
2400
|
Inference Endpoint. If not provided, the default recommended text-to-image model will be used.
|
|
@@ -2468,44 +2481,42 @@ class InferenceClient:
|
|
|
2468
2481
|
Defaults to None.
|
|
2469
2482
|
do_sample (`bool`, *optional*):
|
|
2470
2483
|
Whether to use sampling instead of greedy decoding when generating new tokens.
|
|
2471
|
-
early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"`, *optional*):
|
|
2484
|
+
early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"]`, *optional*):
|
|
2472
2485
|
Controls the stopping condition for beam-based methods.
|
|
2473
2486
|
epsilon_cutoff (`float`, *optional*):
|
|
2474
|
-
If set to float strictly between 0 and 1, only tokens with a conditional probability
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2487
|
+
If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
|
|
2488
|
+
epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on
|
|
2489
|
+
the size of the model. See [Truncation Sampling as Language Model
|
|
2490
|
+
Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
|
2478
2491
|
eta_cutoff (`float`, *optional*):
|
|
2479
|
-
Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
|
2480
|
-
|
|
2481
|
-
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
for more details.
|
|
2492
|
+
Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly
|
|
2493
|
+
between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff)
|
|
2494
|
+
* exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token
|
|
2495
|
+
probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3,
|
|
2496
|
+
depending on the size of the model. See [Truncation Sampling as Language Model
|
|
2497
|
+
Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
|
2486
2498
|
max_length (`int`, *optional*):
|
|
2487
2499
|
The maximum length (in tokens) of the generated text, including the input.
|
|
2488
2500
|
max_new_tokens (`int`, *optional*):
|
|
2489
|
-
The maximum number of tokens to generate. Takes precedence over
|
|
2501
|
+
The maximum number of tokens to generate. Takes precedence over max_length.
|
|
2490
2502
|
min_length (`int`, *optional*):
|
|
2491
2503
|
The minimum length (in tokens) of the generated text, including the input.
|
|
2492
2504
|
min_new_tokens (`int`, *optional*):
|
|
2493
|
-
The minimum number of tokens to generate. Takes precedence over
|
|
2505
|
+
The minimum number of tokens to generate. Takes precedence over min_length.
|
|
2494
2506
|
num_beam_groups (`int`, *optional*):
|
|
2495
|
-
Number of groups to divide num_beams into in order to ensure diversity among different
|
|
2496
|
-
|
|
2507
|
+
Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
|
|
2508
|
+
See [this paper](https://hf.co/papers/1610.02424) for more details.
|
|
2497
2509
|
num_beams (`int`, *optional*):
|
|
2498
2510
|
Number of beams to use for beam search.
|
|
2499
2511
|
penalty_alpha (`float`, *optional*):
|
|
2500
|
-
The value balances the model confidence and the degeneration penalty in contrastive
|
|
2501
|
-
search decoding.
|
|
2512
|
+
The value balances the model confidence and the degeneration penalty in contrastive search decoding.
|
|
2502
2513
|
temperature (`float`, *optional*):
|
|
2503
2514
|
The value used to modulate the next token probabilities.
|
|
2504
2515
|
top_k (`int`, *optional*):
|
|
2505
2516
|
The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
|
2506
2517
|
top_p (`float`, *optional*):
|
|
2507
|
-
If set to float < 1, only the smallest set of most probable tokens with probabilities
|
|
2508
|
-
|
|
2518
|
+
If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
|
|
2519
|
+
top_p or higher are kept for generation.
|
|
2509
2520
|
typical_p (`float`, *optional*):
|
|
2510
2521
|
Local typicality measures how similar the conditional probability of predicting a target token next is
|
|
2511
2522
|
to the expected conditional probability of predicting a random token next, given the partial text
|
|
@@ -2561,7 +2572,7 @@ class InferenceClient:
|
|
|
2561
2572
|
text: str,
|
|
2562
2573
|
*,
|
|
2563
2574
|
model: Optional[str] = None,
|
|
2564
|
-
aggregation_strategy: Optional[
|
|
2575
|
+
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2565
2576
|
ignore_labels: Optional[List[str]] = None,
|
|
2566
2577
|
stride: Optional[int] = None,
|
|
2567
2578
|
) -> List[TokenClassificationOutputElement]:
|
|
@@ -2576,10 +2587,10 @@ class InferenceClient:
|
|
|
2576
2587
|
The model to use for the token classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
2577
2588
|
a deployed Inference Endpoint. If not provided, the default recommended token classification model will be used.
|
|
2578
2589
|
Defaults to None.
|
|
2579
|
-
aggregation_strategy (`
|
|
2580
|
-
The strategy used to fuse tokens based on model predictions
|
|
2581
|
-
ignore_labels (`List[str
|
|
2582
|
-
A list of labels to ignore
|
|
2590
|
+
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2591
|
+
The strategy used to fuse tokens based on model predictions
|
|
2592
|
+
ignore_labels (`List[str`, *optional*):
|
|
2593
|
+
A list of labels to ignore
|
|
2583
2594
|
stride (`int`, *optional*):
|
|
2584
2595
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2585
2596
|
|
|
@@ -2637,7 +2648,7 @@ class InferenceClient:
|
|
|
2637
2648
|
src_lang: Optional[str] = None,
|
|
2638
2649
|
tgt_lang: Optional[str] = None,
|
|
2639
2650
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2640
|
-
truncation: Optional[
|
|
2651
|
+
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
2641
2652
|
generate_parameters: Optional[Dict[str, Any]] = None,
|
|
2642
2653
|
) -> TranslationOutput:
|
|
2643
2654
|
"""
|
|
@@ -2661,7 +2672,7 @@ class InferenceClient:
|
|
|
2661
2672
|
Target language to translate to. Required for models that can translate to multiple languages.
|
|
2662
2673
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
2663
2674
|
Whether to clean up the potential extra spaces in the text output.
|
|
2664
|
-
truncation (`
|
|
2675
|
+
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
2665
2676
|
The truncation strategy to use.
|
|
2666
2677
|
generate_parameters (`Dict[str, Any]`, *optional*):
|
|
2667
2678
|
Additional parametrization of the text generation algorithm.
|
|
@@ -2684,13 +2695,13 @@ class InferenceClient:
|
|
|
2684
2695
|
>>> client.translation("My name is Wolfgang and I live in Berlin")
|
|
2685
2696
|
'Mein Name ist Wolfgang und ich lebe in Berlin.'
|
|
2686
2697
|
>>> client.translation("My name is Wolfgang and I live in Berlin", model="Helsinki-NLP/opus-mt-en-fr")
|
|
2687
|
-
TranslationOutput(translation_text='Je m
|
|
2698
|
+
TranslationOutput(translation_text='Je m'appelle Wolfgang et je vis à Berlin.')
|
|
2688
2699
|
```
|
|
2689
2700
|
|
|
2690
2701
|
Specifying languages:
|
|
2691
2702
|
```py
|
|
2692
2703
|
>>> client.translation("My name is Sarah Jessica Parker but you can call me Jessica", model="facebook/mbart-large-50-many-to-many-mmt", src_lang="en_XX", tgt_lang="fr_XX")
|
|
2693
|
-
"Mon nom est Sarah Jessica Parker mais vous pouvez m
|
|
2704
|
+
"Mon nom est Sarah Jessica Parker mais vous pouvez m'appeler Jessica"
|
|
2694
2705
|
```
|
|
2695
2706
|
"""
|
|
2696
2707
|
# Throw error if only one of `src_lang` and `tgt_lang` was given
|
|
@@ -2731,9 +2742,8 @@ class InferenceClient:
|
|
|
2731
2742
|
a deployed Inference Endpoint. If not provided, the default recommended visual question answering model will be used.
|
|
2732
2743
|
Defaults to None.
|
|
2733
2744
|
top_k (`int`, *optional*):
|
|
2734
|
-
The number of answers to return (will be chosen by order of likelihood). Note that we
|
|
2735
|
-
|
|
2736
|
-
context.
|
|
2745
|
+
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
2746
|
+
topk answers if there are not enough options available within the context.
|
|
2737
2747
|
Returns:
|
|
2738
2748
|
`List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
2739
2749
|
|
|
@@ -2763,14 +2773,22 @@ class InferenceClient:
|
|
|
2763
2773
|
response = self.post(json=payload, model=model, task="visual-question-answering")
|
|
2764
2774
|
return VisualQuestionAnsweringOutputElement.parse_obj_as_list(response)
|
|
2765
2775
|
|
|
2776
|
+
@_deprecate_arguments(
|
|
2777
|
+
version="0.30.0",
|
|
2778
|
+
deprecated_args=["labels"],
|
|
2779
|
+
custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
|
|
2780
|
+
)
|
|
2766
2781
|
def zero_shot_classification(
|
|
2767
2782
|
self,
|
|
2768
2783
|
text: str,
|
|
2769
|
-
|
|
2784
|
+
# temporarily keeping it optional for backward compatibility.
|
|
2785
|
+
candidate_labels: List[str] = None, # type: ignore
|
|
2770
2786
|
*,
|
|
2771
|
-
multi_label: bool = False,
|
|
2787
|
+
multi_label: Optional[bool] = False,
|
|
2772
2788
|
hypothesis_template: Optional[str] = None,
|
|
2773
2789
|
model: Optional[str] = None,
|
|
2790
|
+
# deprecated argument
|
|
2791
|
+
labels: List[str] = None, # type: ignore
|
|
2774
2792
|
) -> List[ZeroShotClassificationOutputElement]:
|
|
2775
2793
|
"""
|
|
2776
2794
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
@@ -2778,20 +2796,22 @@ class InferenceClient:
|
|
|
2778
2796
|
Args:
|
|
2779
2797
|
text (`str`):
|
|
2780
2798
|
The input text to classify.
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2799
|
+
candidate_labels (`List[str]`):
|
|
2800
|
+
The set of possible class labels to classify the text into.
|
|
2801
|
+
labels (`List[str]`, *optional*):
|
|
2802
|
+
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
2803
|
+
multi_label (`bool`, *optional*):
|
|
2804
|
+
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
2805
|
+
the label likelihoods for each sequence is 1. If true, the labels are considered independent and
|
|
2806
|
+
probabilities are normalized for each candidate.
|
|
2786
2807
|
hypothesis_template (`str`, *optional*):
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
For example, with hypothesis_template="This text is about {}." and labels=["economics", "politics"], the system internally creates the two hypotheses "This text is about economics." and "This text is about politics.".
|
|
2790
|
-
The model then evaluates for both hypotheses if they are entailed in the provided `text` or not.
|
|
2808
|
+
The sentence used in conjunction with `candidate_labels` to attempt the text classification by
|
|
2809
|
+
replacing the placeholder with the candidate labels.
|
|
2791
2810
|
model (`str`, *optional*):
|
|
2792
2811
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
2793
2812
|
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
|
|
2794
2813
|
|
|
2814
|
+
|
|
2795
2815
|
Returns:
|
|
2796
2816
|
`List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
2797
2817
|
|
|
@@ -2847,9 +2867,17 @@ class InferenceClient:
|
|
|
2847
2867
|
]
|
|
2848
2868
|
```
|
|
2849
2869
|
"""
|
|
2850
|
-
|
|
2870
|
+
# handle deprecation
|
|
2871
|
+
if labels is not None:
|
|
2872
|
+
if candidate_labels is not None:
|
|
2873
|
+
raise ValueError(
|
|
2874
|
+
"Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
|
|
2875
|
+
)
|
|
2876
|
+
candidate_labels = labels
|
|
2877
|
+
elif candidate_labels is None:
|
|
2878
|
+
raise ValueError("Must specify `candidate_labels`")
|
|
2851
2879
|
parameters = {
|
|
2852
|
-
"candidate_labels":
|
|
2880
|
+
"candidate_labels": candidate_labels,
|
|
2853
2881
|
"multi_label": multi_label,
|
|
2854
2882
|
"hypothesis_template": hypothesis_template,
|
|
2855
2883
|
}
|
|
@@ -2865,13 +2893,21 @@ class InferenceClient:
|
|
|
2865
2893
|
for label, score in zip(output["labels"], output["scores"])
|
|
2866
2894
|
]
|
|
2867
2895
|
|
|
2896
|
+
@_deprecate_arguments(
|
|
2897
|
+
version="0.30.0",
|
|
2898
|
+
deprecated_args=["labels"],
|
|
2899
|
+
custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
|
|
2900
|
+
)
|
|
2868
2901
|
def zero_shot_image_classification(
|
|
2869
2902
|
self,
|
|
2870
2903
|
image: ContentT,
|
|
2871
|
-
|
|
2904
|
+
# temporarily keeping it optional for backward compatibility.
|
|
2905
|
+
candidate_labels: List[str] = None, # type: ignore
|
|
2872
2906
|
*,
|
|
2873
2907
|
model: Optional[str] = None,
|
|
2874
2908
|
hypothesis_template: Optional[str] = None,
|
|
2909
|
+
# deprecated argument
|
|
2910
|
+
labels: List[str] = None, # type: ignore
|
|
2875
2911
|
) -> List[ZeroShotImageClassificationOutputElement]:
|
|
2876
2912
|
"""
|
|
2877
2913
|
Provide input image and text labels to predict text labels for the image.
|
|
@@ -2879,14 +2915,17 @@ class InferenceClient:
|
|
|
2879
2915
|
Args:
|
|
2880
2916
|
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
2881
2917
|
The input image to caption. It can be raw bytes, an image file, or a URL to an online image.
|
|
2882
|
-
|
|
2883
|
-
|
|
2918
|
+
candidate_labels (`List[str]`):
|
|
2919
|
+
The candidate labels for this image
|
|
2920
|
+
labels (`List[str]`, *optional*):
|
|
2921
|
+
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
2884
2922
|
model (`str`, *optional*):
|
|
2885
2923
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
2886
2924
|
Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
|
|
2887
2925
|
hypothesis_template (`str`, *optional*):
|
|
2888
|
-
The sentence used in conjunction with `
|
|
2889
|
-
placeholder with the candidate labels.
|
|
2926
|
+
The sentence used in conjunction with `candidate_labels` to attempt the image classification by
|
|
2927
|
+
replacing the placeholder with the candidate labels.
|
|
2928
|
+
|
|
2890
2929
|
Returns:
|
|
2891
2930
|
`List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
2892
2931
|
|
|
@@ -2908,13 +2947,23 @@ class InferenceClient:
|
|
|
2908
2947
|
[ZeroShotImageClassificationOutputElement(label='dog', score=0.956),...]
|
|
2909
2948
|
```
|
|
2910
2949
|
"""
|
|
2950
|
+
# handle deprecation
|
|
2951
|
+
if labels is not None:
|
|
2952
|
+
if candidate_labels is not None:
|
|
2953
|
+
raise ValueError(
|
|
2954
|
+
"Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
|
|
2955
|
+
)
|
|
2956
|
+
candidate_labels = labels
|
|
2957
|
+
elif candidate_labels is None:
|
|
2958
|
+
raise ValueError("Must specify `candidate_labels`")
|
|
2911
2959
|
# Raise ValueError if input is less than 2 labels
|
|
2912
|
-
if len(
|
|
2960
|
+
if len(candidate_labels) < 2:
|
|
2913
2961
|
raise ValueError("You must specify at least 2 classes to compare.")
|
|
2914
|
-
|
|
2915
|
-
|
|
2916
|
-
|
|
2917
|
-
|
|
2962
|
+
parameters = {
|
|
2963
|
+
"candidate_labels": candidate_labels,
|
|
2964
|
+
"hypothesis_template": hypothesis_template,
|
|
2965
|
+
}
|
|
2966
|
+
payload = _prepare_payload(image, parameters=parameters, expect_binary=True)
|
|
2918
2967
|
response = self.post(
|
|
2919
2968
|
**payload,
|
|
2920
2969
|
model=model,
|