huggingface-hub 0.26.5__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (61) hide show
  1. huggingface_hub/__init__.py +49 -23
  2. huggingface_hub/_commit_scheduler.py +30 -4
  3. huggingface_hub/_local_folder.py +0 -4
  4. huggingface_hub/_login.py +38 -54
  5. huggingface_hub/_snapshot_download.py +6 -3
  6. huggingface_hub/_tensorboard_logger.py +2 -3
  7. huggingface_hub/_upload_large_folder.py +1 -1
  8. huggingface_hub/errors.py +19 -0
  9. huggingface_hub/fastai_utils.py +3 -2
  10. huggingface_hub/file_download.py +10 -12
  11. huggingface_hub/hf_api.py +102 -498
  12. huggingface_hub/hf_file_system.py +274 -35
  13. huggingface_hub/hub_mixin.py +5 -25
  14. huggingface_hub/inference/_client.py +185 -136
  15. huggingface_hub/inference/_common.py +2 -2
  16. huggingface_hub/inference/_generated/_async_client.py +186 -137
  17. huggingface_hub/inference/_generated/types/__init__.py +31 -10
  18. huggingface_hub/inference/_generated/types/audio_classification.py +3 -5
  19. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +6 -9
  20. huggingface_hub/inference/_generated/types/chat_completion.py +8 -5
  21. huggingface_hub/inference/_generated/types/depth_estimation.py +1 -1
  22. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -6
  23. huggingface_hub/inference/_generated/types/feature_extraction.py +1 -1
  24. huggingface_hub/inference/_generated/types/fill_mask.py +2 -4
  25. huggingface_hub/inference/_generated/types/image_classification.py +3 -5
  26. huggingface_hub/inference/_generated/types/image_segmentation.py +2 -4
  27. huggingface_hub/inference/_generated/types/image_to_image.py +2 -4
  28. huggingface_hub/inference/_generated/types/image_to_text.py +6 -9
  29. huggingface_hub/inference/_generated/types/object_detection.py +2 -4
  30. huggingface_hub/inference/_generated/types/question_answering.py +2 -4
  31. huggingface_hub/inference/_generated/types/sentence_similarity.py +1 -1
  32. huggingface_hub/inference/_generated/types/summarization.py +2 -4
  33. huggingface_hub/inference/_generated/types/table_question_answering.py +21 -3
  34. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -4
  35. huggingface_hub/inference/_generated/types/text_classification.py +4 -10
  36. huggingface_hub/inference/_generated/types/text_to_audio.py +7 -10
  37. huggingface_hub/inference/_generated/types/text_to_image.py +2 -4
  38. huggingface_hub/inference/_generated/types/text_to_speech.py +7 -10
  39. huggingface_hub/inference/_generated/types/token_classification.py +11 -12
  40. huggingface_hub/inference/_generated/types/translation.py +2 -4
  41. huggingface_hub/inference/_generated/types/video_classification.py +3 -4
  42. huggingface_hub/inference/_generated/types/visual_question_answering.py +2 -5
  43. huggingface_hub/inference/_generated/types/zero_shot_classification.py +8 -18
  44. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +9 -19
  45. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +7 -9
  46. huggingface_hub/keras_mixin.py +3 -2
  47. huggingface_hub/lfs.py +2 -5
  48. huggingface_hub/repocard_data.py +4 -4
  49. huggingface_hub/serialization/__init__.py +2 -0
  50. huggingface_hub/serialization/_dduf.py +387 -0
  51. huggingface_hub/serialization/_torch.py +372 -14
  52. huggingface_hub/utils/_cache_manager.py +1 -1
  53. huggingface_hub/utils/_headers.py +9 -25
  54. huggingface_hub/utils/tqdm.py +15 -0
  55. {huggingface_hub-0.26.5.dist-info → huggingface_hub-0.27.0.dist-info}/METADATA +8 -3
  56. {huggingface_hub-0.26.5.dist-info → huggingface_hub-0.27.0.dist-info}/RECORD +60 -60
  57. huggingface_hub/_multi_commits.py +0 -306
  58. {huggingface_hub-0.26.5.dist-info → huggingface_hub-0.27.0.dist-info}/LICENSE +0 -0
  59. {huggingface_hub-0.26.5.dist-info → huggingface_hub-0.27.0.dist-info}/WHEEL +0 -0
  60. {huggingface_hub-0.26.5.dist-info → huggingface_hub-0.27.0.dist-info}/entry_points.txt +0 -0
  61. {huggingface_hub-0.26.5.dist-info → huggingface_hub-0.27.0.dist-info}/top_level.txt +0 -0
@@ -56,17 +56,24 @@ from huggingface_hub.inference._generated.types import (
56
56
  AutomaticSpeechRecognitionOutput,
57
57
  ChatCompletionInputGrammarType,
58
58
  ChatCompletionInputStreamOptions,
59
- ChatCompletionInputToolType,
59
+ ChatCompletionInputTool,
60
+ ChatCompletionInputToolChoiceClass,
61
+ ChatCompletionInputToolChoiceEnum,
60
62
  ChatCompletionOutput,
61
63
  ChatCompletionStreamOutput,
62
64
  DocumentQuestionAnsweringOutputElement,
63
65
  FillMaskOutputElement,
64
66
  ImageClassificationOutputElement,
67
+ ImageClassificationOutputTransform,
65
68
  ImageSegmentationOutputElement,
69
+ ImageSegmentationSubtask,
70
+ ImageToImageTargetSize,
66
71
  ImageToTextOutput,
67
72
  ObjectDetectionOutputElement,
73
+ Padding,
68
74
  QuestionAnsweringOutputElement,
69
75
  SummarizationOutput,
76
+ SummarizationTruncationStrategy,
70
77
  TableQuestionAnsweringOutputElement,
71
78
  TextClassificationOutputElement,
72
79
  TextClassificationOutputTransform,
@@ -75,9 +82,10 @@ from huggingface_hub.inference._generated.types import (
75
82
  TextGenerationStreamOutput,
76
83
  TextToImageTargetSize,
77
84
  TextToSpeechEarlyStoppingEnum,
85
+ TokenClassificationAggregationStrategy,
78
86
  TokenClassificationOutputElement,
79
- ToolElement,
80
87
  TranslationOutput,
88
+ TranslationTruncationStrategy,
81
89
  VisualQuestionAnsweringOutputElement,
82
90
  ZeroShotClassificationOutputElement,
83
91
  ZeroShotImageClassificationOutputElement,
@@ -170,7 +178,9 @@ class AsyncInferenceClient:
170
178
 
171
179
  self.model: Optional[str] = model
172
180
  self.token: Union[str, bool, None] = token if token is not None else api_key
173
- self.headers = CaseInsensitiveDict(build_hf_headers(token=self.token)) # 'authorization' + 'user-agent'
181
+ self.headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
182
+ build_hf_headers(token=self.token) # 'authorization' + 'user-agent'
183
+ )
174
184
  if headers is not None:
175
185
  self.headers.update(headers)
176
186
  self.cookies = cookies
@@ -317,7 +327,7 @@ class AsyncInferenceClient:
317
327
  logger.info(f"Waiting for model to be loaded on the server: {error}")
318
328
  if "X-wait-for-model" not in headers and url.startswith(INFERENCE_ENDPOINT):
319
329
  headers["X-wait-for-model"] = "1"
320
- time.sleep(1)
330
+ await asyncio.sleep(1)
321
331
  if timeout is not None:
322
332
  timeout = max(self.timeout - (time.time() - t0), 1) # type: ignore
323
333
  continue
@@ -374,7 +384,7 @@ class AsyncInferenceClient:
374
384
  top_k (`int`, *optional*):
375
385
  When specified, limits the output to the top K most probable classes.
376
386
  function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
377
- The function to apply to the output.
387
+ The function to apply to the model outputs in order to retrieve the scores.
378
388
 
379
389
  Returns:
380
390
  `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -502,9 +512,9 @@ class AsyncInferenceClient:
502
512
  stop: Optional[List[str]] = None,
503
513
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
504
514
  temperature: Optional[float] = None,
505
- tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None,
515
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
506
516
  tool_prompt: Optional[str] = None,
507
- tools: Optional[List[ToolElement]] = None,
517
+ tools: Optional[List[ChatCompletionInputTool]] = None,
508
518
  top_logprobs: Optional[int] = None,
509
519
  top_p: Optional[float] = None,
510
520
  ) -> ChatCompletionOutput: ...
@@ -527,9 +537,9 @@ class AsyncInferenceClient:
527
537
  stop: Optional[List[str]] = None,
528
538
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
529
539
  temperature: Optional[float] = None,
530
- tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None,
540
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
531
541
  tool_prompt: Optional[str] = None,
532
- tools: Optional[List[ToolElement]] = None,
542
+ tools: Optional[List[ChatCompletionInputTool]] = None,
533
543
  top_logprobs: Optional[int] = None,
534
544
  top_p: Optional[float] = None,
535
545
  ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
@@ -552,9 +562,9 @@ class AsyncInferenceClient:
552
562
  stop: Optional[List[str]] = None,
553
563
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
554
564
  temperature: Optional[float] = None,
555
- tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None,
565
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
556
566
  tool_prompt: Optional[str] = None,
557
- tools: Optional[List[ToolElement]] = None,
567
+ tools: Optional[List[ChatCompletionInputTool]] = None,
558
568
  top_logprobs: Optional[int] = None,
559
569
  top_p: Optional[float] = None,
560
570
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
@@ -577,9 +587,9 @@ class AsyncInferenceClient:
577
587
  stop: Optional[List[str]] = None,
578
588
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
579
589
  temperature: Optional[float] = None,
580
- tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None,
590
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
581
591
  tool_prompt: Optional[str] = None,
582
- tools: Optional[List[ToolElement]] = None,
592
+ tools: Optional[List[ChatCompletionInputTool]] = None,
583
593
  top_logprobs: Optional[int] = None,
584
594
  top_p: Optional[float] = None,
585
595
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
@@ -618,7 +628,7 @@ class AsyncInferenceClient:
618
628
  Whether to return log probabilities of the output tokens or not. If true, returns the log
619
629
  probabilities of each output token returned in the content of message.
620
630
  max_tokens (`int`, *optional*):
621
- Maximum number of tokens allowed in the response. Defaults to 20.
631
+ Maximum number of tokens allowed in the response. Defaults to 100.
622
632
  n (`int`, *optional*):
623
633
  UNUSED.
624
634
  presence_penalty (`float`, *optional*):
@@ -645,11 +655,11 @@ class AsyncInferenceClient:
645
655
  top_p (`float`, *optional*):
646
656
  Fraction of the most likely next words to sample from.
647
657
  Must be between 0 and 1. Defaults to 1.0.
648
- tool_choice ([`ChatCompletionInputToolType`] or `str`, *optional*):
658
+ tool_choice ([`ChatCompletionInputToolChoiceClass`] or [`ChatCompletionInputToolChoiceEnum`], *optional*):
649
659
  The tool to use for the completion. Defaults to "auto".
650
660
  tool_prompt (`str`, *optional*):
651
661
  A prompt to be appended before the tools.
652
- tools (List of [`ToolElement`], *optional*):
662
+ tools (List of [`ChatCompletionInputTool`], *optional*):
653
663
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
654
664
  provide a list of functions the model may generate JSON inputs for.
655
665
 
@@ -981,28 +991,25 @@ class AsyncInferenceClient:
981
991
  a deployed Inference Endpoint. If not provided, the default recommended document question answering model will be used.
982
992
  Defaults to None.
983
993
  doc_stride (`int`, *optional*):
984
- If the words in the document are too long to fit with the question for the model, it will
985
- be split in several chunks with some overlap. This argument controls the size of that
986
- overlap.
994
+ If the words in the document are too long to fit with the question for the model, it will be split in
995
+ several chunks with some overlap. This argument controls the size of that overlap.
987
996
  handle_impossible_answer (`bool`, *optional*):
988
- Whether to accept impossible as an answer.
997
+ Whether to accept impossible as an answer
989
998
  lang (`str`, *optional*):
990
- Language to use while running OCR.
999
+ Language to use while running OCR. Defaults to english.
991
1000
  max_answer_len (`int`, *optional*):
992
- The maximum length of predicted answers (e.g., only answers with a shorter length are
993
- considered).
1001
+ The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
994
1002
  max_question_len (`int`, *optional*):
995
1003
  The maximum length of the question after tokenization. It will be truncated if needed.
996
1004
  max_seq_len (`int`, *optional*):
997
- The maximum length of the total sentence (context + question) in tokens of each chunk
998
- passed to the model. The context will be split in several chunks (using doc_stride as
999
- overlap) if needed.
1005
+ The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
1006
+ model. The context will be split in several chunks (using doc_stride as overlap) if needed.
1000
1007
  top_k (`int`, *optional*):
1001
- The number of answers to return (will be chosen by order of likelihood). Can return less
1002
- than top_k answers if there are not enough options available within the context.
1003
- word_boxes (`List[Union[List[float], str]]`, *optional*):
1004
- A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
1005
- skip the OCR step and use the provided bounding boxes instead.
1008
+ The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
1009
+ answers if there are not enough options available within the context.
1010
+ word_boxes (`List[Union[List[float], str`, *optional*):
1011
+ A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
1012
+ step and use the provided bounding boxes instead.
1006
1013
  Returns:
1007
1014
  `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1008
1015
 
@@ -1019,7 +1026,7 @@ class AsyncInferenceClient:
1019
1026
  >>> from huggingface_hub import AsyncInferenceClient
1020
1027
  >>> client = AsyncInferenceClient()
1021
1028
  >>> await client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
1022
- [DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16, words=None)]
1029
+ [DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
1023
1030
  ```
1024
1031
  """
1025
1032
  inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
@@ -1121,11 +1128,10 @@ class AsyncInferenceClient:
1121
1128
  model (`str`, *optional*):
1122
1129
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1123
1130
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1124
- targets (`List[str]`, *optional*):
1125
- When passed, the model will limit the scores to the passed targets instead of looking up
1126
- in the whole vocabulary. If the provided targets are not in the model vocab, they will be
1127
- tokenized and the first resulting token will be used (with a warning, and that might be
1128
- slower).
1131
+ targets (`List[str`, *optional*):
1132
+ When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1133
+ vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1134
+ resulting token will be used (with a warning, and that might be slower).
1129
1135
  top_k (`int`, *optional*):
1130
1136
  When passed, overrides the number of predictions to return.
1131
1137
  Returns:
@@ -1160,7 +1166,7 @@ class AsyncInferenceClient:
1160
1166
  image: ContentT,
1161
1167
  *,
1162
1168
  model: Optional[str] = None,
1163
- function_to_apply: Optional[Literal["sigmoid", "softmax", "none"]] = None,
1169
+ function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1164
1170
  top_k: Optional[int] = None,
1165
1171
  ) -> List[ImageClassificationOutputElement]:
1166
1172
  """
@@ -1172,8 +1178,8 @@ class AsyncInferenceClient:
1172
1178
  model (`str`, *optional*):
1173
1179
  The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1174
1180
  deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
1175
- function_to_apply (`Literal["sigmoid", "softmax", "none"]`, *optional*):
1176
- The function to apply to the output scores.
1181
+ function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
1182
+ The function to apply to the model outputs in order to retrieve the scores.
1177
1183
  top_k (`int`, *optional*):
1178
1184
  When specified, limits the output to the top K most probable classes.
1179
1185
  Returns:
@@ -1206,7 +1212,7 @@ class AsyncInferenceClient:
1206
1212
  model: Optional[str] = None,
1207
1213
  mask_threshold: Optional[float] = None,
1208
1214
  overlap_mask_area_threshold: Optional[float] = None,
1209
- subtask: Optional[Literal["instance", "panoptic", "semantic"]] = None,
1215
+ subtask: Optional["ImageSegmentationSubtask"] = None,
1210
1216
  threshold: Optional[float] = None,
1211
1217
  ) -> List[ImageSegmentationOutputElement]:
1212
1218
  """
@@ -1228,7 +1234,7 @@ class AsyncInferenceClient:
1228
1234
  Threshold to use when turning the predicted masks into binary values.
1229
1235
  overlap_mask_area_threshold (`float`, *optional*):
1230
1236
  Mask overlap threshold to eliminate small, disconnected segments.
1231
- subtask (`Literal["instance", "panoptic", "semantic"]`, *optional*):
1237
+ subtask (`"ImageSegmentationSubtask"`, *optional*):
1232
1238
  Segmentation task to be performed, depending on model capabilities.
1233
1239
  threshold (`float`, *optional*):
1234
1240
  Probability threshold to filter out predicted masks.
@@ -1268,12 +1274,11 @@ class AsyncInferenceClient:
1268
1274
  image: ContentT,
1269
1275
  prompt: Optional[str] = None,
1270
1276
  *,
1271
- negative_prompt: Optional[str] = None,
1272
- height: Optional[int] = None,
1273
- width: Optional[int] = None,
1277
+ negative_prompt: Optional[List[str]] = None,
1274
1278
  num_inference_steps: Optional[int] = None,
1275
1279
  guidance_scale: Optional[float] = None,
1276
1280
  model: Optional[str] = None,
1281
+ target_size: Optional[ImageToImageTargetSize] = None,
1277
1282
  **kwargs,
1278
1283
  ) -> "Image":
1279
1284
  """
@@ -1290,21 +1295,19 @@ class AsyncInferenceClient:
1290
1295
  The input image for translation. It can be raw bytes, an image file, or a URL to an online image.
1291
1296
  prompt (`str`, *optional*):
1292
1297
  The text prompt to guide the image generation.
1293
- negative_prompt (`str`, *optional*):
1294
- A negative prompt to guide the translation process.
1295
- height (`int`, *optional*):
1296
- The height in pixels of the generated image.
1297
- width (`int`, *optional*):
1298
- The width in pixels of the generated image.
1298
+ negative_prompt (`List[str]`, *optional*):
1299
+ One or several prompt to guide what NOT to include in image generation.
1299
1300
  num_inference_steps (`int`, *optional*):
1300
- The number of denoising steps. More denoising steps usually lead to a higher quality image at the
1301
- expense of slower inference.
1301
+ For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
1302
+ quality image at the expense of slower inference.
1302
1303
  guidance_scale (`float`, *optional*):
1303
- Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1304
- usually at the expense of lower image quality.
1304
+ For diffusion models. A higher guidance scale value encourages the model to generate images closely
1305
+ linked to the text prompt at the expense of lower image quality.
1305
1306
  model (`str`, *optional*):
1306
1307
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1307
1308
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
1309
+ target_size (`ImageToImageTargetSize`, *optional*):
1310
+ The size in pixel of the output image.
1308
1311
 
1309
1312
  Returns:
1310
1313
  `Image`: The translated image.
@@ -1327,8 +1330,7 @@ class AsyncInferenceClient:
1327
1330
  parameters = {
1328
1331
  "prompt": prompt,
1329
1332
  "negative_prompt": negative_prompt,
1330
- "height": height,
1331
- "width": width,
1333
+ "target_size": target_size,
1332
1334
  "num_inference_steps": num_inference_steps,
1333
1335
  "guidance_scale": guidance_scale,
1334
1336
  **kwargs,
@@ -1537,26 +1539,24 @@ class AsyncInferenceClient:
1537
1539
  The model to use for the question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1538
1540
  a deployed Inference Endpoint.
1539
1541
  align_to_words (`bool`, *optional*):
1540
- Attempts to align the answer to real words. Improves quality on space separated
1541
- languages. Might hurt on non-space-separated languages (like Japanese or Chinese).
1542
+ Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt
1543
+ on non-space-separated languages (like Japanese or Chinese)
1542
1544
  doc_stride (`int`, *optional*):
1543
- If the context is too long to fit with the question for the model, it will be split in
1544
- several chunks with some overlap. This argument controls the size of that overlap.
1545
+ If the context is too long to fit with the question for the model, it will be split in several chunks
1546
+ with some overlap. This argument controls the size of that overlap.
1545
1547
  handle_impossible_answer (`bool`, *optional*):
1546
1548
  Whether to accept impossible as an answer.
1547
1549
  max_answer_len (`int`, *optional*):
1548
- The maximum length of predicted answers (e.g., only answers with a shorter length are
1549
- considered).
1550
+ The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
1550
1551
  max_question_len (`int`, *optional*):
1551
1552
  The maximum length of the question after tokenization. It will be truncated if needed.
1552
1553
  max_seq_len (`int`, *optional*):
1553
- The maximum length of the total sentence (context + question) in tokens of each chunk
1554
- passed to the model. The context will be split in several chunks (using docStride as
1555
- overlap) if needed.
1554
+ The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
1555
+ model. The context will be split in several chunks (using docStride as overlap) if needed.
1556
1556
  top_k (`int`, *optional*):
1557
- The number of answers to return (will be chosen by order of likelihood). Note that we
1558
- return less than topk answers if there are not enough options available within the
1559
- context.
1557
+ The number of answers to return (will be chosen by order of likelihood). Note that we return less than
1558
+ topk answers if there are not enough options available within the context.
1559
+
1560
1560
  Returns:
1561
1561
  Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1562
1562
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
@@ -1660,7 +1660,7 @@ class AsyncInferenceClient:
1660
1660
  model: Optional[str] = None,
1661
1661
  clean_up_tokenization_spaces: Optional[bool] = None,
1662
1662
  generate_parameters: Optional[Dict[str, Any]] = None,
1663
- truncation: Optional[Literal["do_not_truncate", "longest_first", "only_first", "only_second"]] = None,
1663
+ truncation: Optional["SummarizationTruncationStrategy"] = None,
1664
1664
  ) -> SummarizationOutput:
1665
1665
  """
1666
1666
  Generate a summary of a given text using a specified model.
@@ -1678,7 +1678,7 @@ class AsyncInferenceClient:
1678
1678
  Whether to clean up the potential extra spaces in the text output.
1679
1679
  generate_parameters (`Dict[str, Any]`, *optional*):
1680
1680
  Additional parametrization of the text generation algorithm.
1681
- truncation (`Literal["do_not_truncate", "longest_first", "only_first", "only_second"]`, *optional*):
1681
+ truncation (`"SummarizationTruncationStrategy"`, *optional*):
1682
1682
  The truncation strategy to use.
1683
1683
  Returns:
1684
1684
  [`SummarizationOutput`]: The generated summary text.
@@ -1714,7 +1714,9 @@ class AsyncInferenceClient:
1714
1714
  query: str,
1715
1715
  *,
1716
1716
  model: Optional[str] = None,
1717
- parameters: Optional[Dict[str, Any]] = None,
1717
+ padding: Optional["Padding"] = None,
1718
+ sequential: Optional[bool] = None,
1719
+ truncation: Optional[bool] = None,
1718
1720
  ) -> TableQuestionAnsweringOutputElement:
1719
1721
  """
1720
1722
  Retrieve the answer to a question from information given in a table.
@@ -1728,8 +1730,14 @@ class AsyncInferenceClient:
1728
1730
  model (`str`):
1729
1731
  The model to use for the table-question-answering task. Can be a model ID hosted on the Hugging Face
1730
1732
  Hub or a URL to a deployed Inference Endpoint.
1731
- parameters (`Dict[str, Any]`, *optional*):
1732
- Additional inference parameters. Defaults to None.
1733
+ padding (`"Padding"`, *optional*):
1734
+ Activates and controls padding.
1735
+ sequential (`bool`, *optional*):
1736
+ Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
1737
+ inference to be done sequentially to extract relations within sequences, given their conversational
1738
+ nature.
1739
+ truncation (`bool`, *optional*):
1740
+ Activates and controls truncation.
1733
1741
 
1734
1742
  Returns:
1735
1743
  [`TableQuestionAnsweringOutputElement`]: a table question answering output containing the answer, coordinates, cells and the aggregator used.
@@ -1751,6 +1759,11 @@ class AsyncInferenceClient:
1751
1759
  TableQuestionAnsweringOutputElement(answer='36542', coordinates=[[0, 1]], cells=['36542'], aggregator='AVERAGE')
1752
1760
  ```
1753
1761
  """
1762
+ parameters = {
1763
+ "padding": padding,
1764
+ "sequential": sequential,
1765
+ "truncation": truncation,
1766
+ }
1754
1767
  inputs = {
1755
1768
  "query": query,
1756
1769
  "table": table,
@@ -1875,7 +1888,7 @@ class AsyncInferenceClient:
1875
1888
  top_k (`int`, *optional*):
1876
1889
  When specified, limits the output to the top K most probable classes.
1877
1890
  function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
1878
- The function to apply to the output.
1891
+ The function to apply to the model outputs in order to retrieve the scores.
1879
1892
 
1880
1893
  Returns:
1881
1894
  `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
@@ -2136,7 +2149,7 @@ class AsyncInferenceClient:
2136
2149
  grammar ([`TextGenerationInputGrammarType`], *optional*):
2137
2150
  Grammar constraints. Can be either a JSONSchema or a regex.
2138
2151
  max_new_tokens (`int`, *optional*):
2139
- Maximum number of generated tokens
2152
+ Maximum number of generated tokens. Defaults to 100.
2140
2153
  repetition_penalty (`float`, *optional*):
2141
2154
  The parameter for repetition penalty. 1.0 means no penalty. See [this
2142
2155
  paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
@@ -2411,10 +2424,10 @@ class AsyncInferenceClient:
2411
2424
  self,
2412
2425
  prompt: str,
2413
2426
  *,
2414
- negative_prompt: Optional[str] = None,
2427
+ negative_prompt: Optional[List[str]] = None,
2415
2428
  height: Optional[float] = None,
2416
2429
  width: Optional[float] = None,
2417
- num_inference_steps: Optional[float] = None,
2430
+ num_inference_steps: Optional[int] = None,
2418
2431
  guidance_scale: Optional[float] = None,
2419
2432
  model: Optional[str] = None,
2420
2433
  scheduler: Optional[str] = None,
@@ -2434,8 +2447,8 @@ class AsyncInferenceClient:
2434
2447
  Args:
2435
2448
  prompt (`str`):
2436
2449
  The prompt to generate an image from.
2437
- negative_prompt (`str`, *optional*):
2438
- An optional negative prompt for the image generation.
2450
+ negative_prompt (`List[str`, *optional*):
2451
+ One or several prompt to guide what NOT to include in image generation.
2439
2452
  height (`float`, *optional*):
2440
2453
  The height in pixels of the image to generate.
2441
2454
  width (`float`, *optional*):
@@ -2444,8 +2457,8 @@ class AsyncInferenceClient:
2444
2457
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
2445
2458
  expense of slower inference.
2446
2459
  guidance_scale (`float`, *optional*):
2447
- Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
2448
- usually at the expense of lower image quality.
2460
+ A higher guidance scale value encourages the model to generate images closely linked to the text
2461
+ prompt, but values too high may cause saturation and other artifacts.
2449
2462
  model (`str`, *optional*):
2450
2463
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
2451
2464
  Inference Endpoint. If not provided, the default recommended text-to-image model will be used.
@@ -2533,44 +2546,42 @@ class AsyncInferenceClient:
2533
2546
  Defaults to None.
2534
2547
  do_sample (`bool`, *optional*):
2535
2548
  Whether to use sampling instead of greedy decoding when generating new tokens.
2536
- early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"`, *optional*):
2549
+ early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"]`, *optional*):
2537
2550
  Controls the stopping condition for beam-based methods.
2538
2551
  epsilon_cutoff (`float`, *optional*):
2539
- If set to float strictly between 0 and 1, only tokens with a conditional probability
2540
- greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
2541
- 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
2542
- Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
2552
+ If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
2553
+ epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on
2554
+ the size of the model. See [Truncation Sampling as Language Model
2555
+ Desmoothing](https://hf.co/papers/2210.15191) for more details.
2543
2556
  eta_cutoff (`float`, *optional*):
2544
- Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
2545
- float strictly between 0 and 1, a token is only considered if it is greater than either
2546
- eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
2547
- term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
2548
- the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
2549
- See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
2550
- for more details.
2557
+ Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly
2558
+ between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff)
2559
+ * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token
2560
+ probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3,
2561
+ depending on the size of the model. See [Truncation Sampling as Language Model
2562
+ Desmoothing](https://hf.co/papers/2210.15191) for more details.
2551
2563
  max_length (`int`, *optional*):
2552
2564
  The maximum length (in tokens) of the generated text, including the input.
2553
2565
  max_new_tokens (`int`, *optional*):
2554
- The maximum number of tokens to generate. Takes precedence over maxLength.
2566
+ The maximum number of tokens to generate. Takes precedence over max_length.
2555
2567
  min_length (`int`, *optional*):
2556
2568
  The minimum length (in tokens) of the generated text, including the input.
2557
2569
  min_new_tokens (`int`, *optional*):
2558
- The minimum number of tokens to generate. Takes precedence over maxLength.
2570
+ The minimum number of tokens to generate. Takes precedence over min_length.
2559
2571
  num_beam_groups (`int`, *optional*):
2560
- Number of groups to divide num_beams into in order to ensure diversity among different
2561
- groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
2572
+ Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
2573
+ See [this paper](https://hf.co/papers/1610.02424) for more details.
2562
2574
  num_beams (`int`, *optional*):
2563
2575
  Number of beams to use for beam search.
2564
2576
  penalty_alpha (`float`, *optional*):
2565
- The value balances the model confidence and the degeneration penalty in contrastive
2566
- search decoding.
2577
+ The value balances the model confidence and the degeneration penalty in contrastive search decoding.
2567
2578
  temperature (`float`, *optional*):
2568
2579
  The value used to modulate the next token probabilities.
2569
2580
  top_k (`int`, *optional*):
2570
2581
  The number of highest probability vocabulary tokens to keep for top-k-filtering.
2571
2582
  top_p (`float`, *optional*):
2572
- If set to float < 1, only the smallest set of most probable tokens with probabilities
2573
- that add up to top_p or higher are kept for generation.
2583
+ If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
2584
+ top_p or higher are kept for generation.
2574
2585
  typical_p (`float`, *optional*):
2575
2586
  Local typicality measures how similar the conditional probability of predicting a target token next is
2576
2587
  to the expected conditional probability of predicting a random token next, given the partial text
@@ -2627,7 +2638,7 @@ class AsyncInferenceClient:
2627
2638
  text: str,
2628
2639
  *,
2629
2640
  model: Optional[str] = None,
2630
- aggregation_strategy: Optional[Literal["none", "simple", "first", "average", "max"]] = None,
2641
+ aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2631
2642
  ignore_labels: Optional[List[str]] = None,
2632
2643
  stride: Optional[int] = None,
2633
2644
  ) -> List[TokenClassificationOutputElement]:
@@ -2642,10 +2653,10 @@ class AsyncInferenceClient:
2642
2653
  The model to use for the token classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
2643
2654
  a deployed Inference Endpoint. If not provided, the default recommended token classification model will be used.
2644
2655
  Defaults to None.
2645
- aggregation_strategy (`Literal["none", "simple", "first", "average", "max"]`, *optional*):
2646
- The strategy used to fuse tokens based on model predictions.
2647
- ignore_labels (`List[str]`, *optional*):
2648
- A list of labels to ignore.
2656
+ aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2657
+ The strategy used to fuse tokens based on model predictions
2658
+ ignore_labels (`List[str`, *optional*):
2659
+ A list of labels to ignore
2649
2660
  stride (`int`, *optional*):
2650
2661
  The number of overlapping tokens between chunks when splitting the input text.
2651
2662
 
@@ -2704,7 +2715,7 @@ class AsyncInferenceClient:
2704
2715
  src_lang: Optional[str] = None,
2705
2716
  tgt_lang: Optional[str] = None,
2706
2717
  clean_up_tokenization_spaces: Optional[bool] = None,
2707
- truncation: Optional[Literal["do_not_truncate", "longest_first", "only_first", "only_second"]] = None,
2718
+ truncation: Optional["TranslationTruncationStrategy"] = None,
2708
2719
  generate_parameters: Optional[Dict[str, Any]] = None,
2709
2720
  ) -> TranslationOutput:
2710
2721
  """
@@ -2728,7 +2739,7 @@ class AsyncInferenceClient:
2728
2739
  Target language to translate to. Required for models that can translate to multiple languages.
2729
2740
  clean_up_tokenization_spaces (`bool`, *optional*):
2730
2741
  Whether to clean up the potential extra spaces in the text output.
2731
- truncation (`Literal["do_not_truncate", "longest_first", "only_first", "only_second"]`, *optional*):
2742
+ truncation (`"TranslationTruncationStrategy"`, *optional*):
2732
2743
  The truncation strategy to use.
2733
2744
  generate_parameters (`Dict[str, Any]`, *optional*):
2734
2745
  Additional parametrization of the text generation algorithm.
@@ -2752,13 +2763,13 @@ class AsyncInferenceClient:
2752
2763
  >>> await client.translation("My name is Wolfgang and I live in Berlin")
2753
2764
  'Mein Name ist Wolfgang und ich lebe in Berlin.'
2754
2765
  >>> await client.translation("My name is Wolfgang and I live in Berlin", model="Helsinki-NLP/opus-mt-en-fr")
2755
- TranslationOutput(translation_text='Je m\'appelle Wolfgang et je vis à Berlin.')
2766
+ TranslationOutput(translation_text='Je m'appelle Wolfgang et je vis à Berlin.')
2756
2767
  ```
2757
2768
 
2758
2769
  Specifying languages:
2759
2770
  ```py
2760
2771
  >>> client.translation("My name is Sarah Jessica Parker but you can call me Jessica", model="facebook/mbart-large-50-many-to-many-mmt", src_lang="en_XX", tgt_lang="fr_XX")
2761
- "Mon nom est Sarah Jessica Parker mais vous pouvez m\'appeler Jessica"
2772
+ "Mon nom est Sarah Jessica Parker mais vous pouvez m'appeler Jessica"
2762
2773
  ```
2763
2774
  """
2764
2775
  # Throw error if only one of `src_lang` and `tgt_lang` was given
@@ -2799,9 +2810,8 @@ class AsyncInferenceClient:
2799
2810
  a deployed Inference Endpoint. If not provided, the default recommended visual question answering model will be used.
2800
2811
  Defaults to None.
2801
2812
  top_k (`int`, *optional*):
2802
- The number of answers to return (will be chosen by order of likelihood). Note that we
2803
- return less than topk answers if there are not enough options available within the
2804
- context.
2813
+ The number of answers to return (will be chosen by order of likelihood). Note that we return less than
2814
+ topk answers if there are not enough options available within the context.
2805
2815
  Returns:
2806
2816
  `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
2807
2817
 
@@ -2832,14 +2842,22 @@ class AsyncInferenceClient:
2832
2842
  response = await self.post(json=payload, model=model, task="visual-question-answering")
2833
2843
  return VisualQuestionAnsweringOutputElement.parse_obj_as_list(response)
2834
2844
 
2845
+ @_deprecate_arguments(
2846
+ version="0.30.0",
2847
+ deprecated_args=["labels"],
2848
+ custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
2849
+ )
2835
2850
  async def zero_shot_classification(
2836
2851
  self,
2837
2852
  text: str,
2838
- labels: List[str],
2853
+ # temporarily keeping it optional for backward compatibility.
2854
+ candidate_labels: List[str] = None, # type: ignore
2839
2855
  *,
2840
- multi_label: bool = False,
2856
+ multi_label: Optional[bool] = False,
2841
2857
  hypothesis_template: Optional[str] = None,
2842
2858
  model: Optional[str] = None,
2859
+ # deprecated argument
2860
+ labels: List[str] = None, # type: ignore
2843
2861
  ) -> List[ZeroShotClassificationOutputElement]:
2844
2862
  """
2845
2863
  Provide as input a text and a set of candidate labels to classify the input text.
@@ -2847,20 +2865,22 @@ class AsyncInferenceClient:
2847
2865
  Args:
2848
2866
  text (`str`):
2849
2867
  The input text to classify.
2850
- labels (`List[str]`):
2851
- List of strings. Each string is the verbalization of a possible label for the input text.
2852
- multi_label (`bool`):
2853
- Boolean. If True, the probability for each label is evaluated independently and multiple labels can have a probability close to 1 simultaneously or all probabilities can be close to 0.
2854
- If False, the labels are considered mutually exclusive and the probability over all labels always sums to 1. Defaults to False.
2868
+ candidate_labels (`List[str]`):
2869
+ The set of possible class labels to classify the text into.
2870
+ labels (`List[str]`, *optional*):
2871
+ (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
2872
+ multi_label (`bool`, *optional*):
2873
+ Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
2874
+ the label likelihoods for each sequence is 1. If true, the labels are considered independent and
2875
+ probabilities are normalized for each candidate.
2855
2876
  hypothesis_template (`str`, *optional*):
2856
- A template sentence string with curly brackets to which the label strings are added. The label strings are added at the position of the curly brackets "{}".
2857
- Zero-shot classifiers are based on NLI models, which evaluate if a hypothesis is entailed in another text or not.
2858
- For example, with hypothesis_template="This text is about {}." and labels=["economics", "politics"], the system internally creates the two hypotheses "This text is about economics." and "This text is about politics.".
2859
- The model then evaluates for both hypotheses if they are entailed in the provided `text` or not.
2877
+ The sentence used in conjunction with `candidate_labels` to attempt the text classification by
2878
+ replacing the placeholder with the candidate labels.
2860
2879
  model (`str`, *optional*):
2861
2880
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
2862
2881
  Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
2863
2882
 
2883
+
2864
2884
  Returns:
2865
2885
  `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
2866
2886
 
@@ -2918,9 +2938,17 @@ class AsyncInferenceClient:
2918
2938
  ]
2919
2939
  ```
2920
2940
  """
2921
-
2941
+ # handle deprecation
2942
+ if labels is not None:
2943
+ if candidate_labels is not None:
2944
+ raise ValueError(
2945
+ "Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
2946
+ )
2947
+ candidate_labels = labels
2948
+ elif candidate_labels is None:
2949
+ raise ValueError("Must specify `candidate_labels`")
2922
2950
  parameters = {
2923
- "candidate_labels": labels,
2951
+ "candidate_labels": candidate_labels,
2924
2952
  "multi_label": multi_label,
2925
2953
  "hypothesis_template": hypothesis_template,
2926
2954
  }
@@ -2936,13 +2964,21 @@ class AsyncInferenceClient:
2936
2964
  for label, score in zip(output["labels"], output["scores"])
2937
2965
  ]
2938
2966
 
2967
+ @_deprecate_arguments(
2968
+ version="0.30.0",
2969
+ deprecated_args=["labels"],
2970
+ custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
2971
+ )
2939
2972
  async def zero_shot_image_classification(
2940
2973
  self,
2941
2974
  image: ContentT,
2942
- labels: List[str],
2975
+ # temporarily keeping it optional for backward compatibility.
2976
+ candidate_labels: List[str] = None, # type: ignore
2943
2977
  *,
2944
2978
  model: Optional[str] = None,
2945
2979
  hypothesis_template: Optional[str] = None,
2980
+ # deprecated argument
2981
+ labels: List[str] = None, # type: ignore
2946
2982
  ) -> List[ZeroShotImageClassificationOutputElement]:
2947
2983
  """
2948
2984
  Provide input image and text labels to predict text labels for the image.
@@ -2950,14 +2986,17 @@ class AsyncInferenceClient:
2950
2986
  Args:
2951
2987
  image (`Union[str, Path, bytes, BinaryIO]`):
2952
2988
  The input image to caption. It can be raw bytes, an image file, or a URL to an online image.
2953
- labels (`List[str]`):
2954
- List of string possible labels. There must be at least 2 labels.
2989
+ candidate_labels (`List[str]`):
2990
+ The candidate labels for this image
2991
+ labels (`List[str]`, *optional*):
2992
+ (deprecated) List of string possible labels. There must be at least 2 labels.
2955
2993
  model (`str`, *optional*):
2956
2994
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
2957
2995
  Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
2958
2996
  hypothesis_template (`str`, *optional*):
2959
- The sentence used in conjunction with `labels` to attempt the text classification by replacing the
2960
- placeholder with the candidate labels.
2997
+ The sentence used in conjunction with `candidate_labels` to attempt the image classification by
2998
+ replacing the placeholder with the candidate labels.
2999
+
2961
3000
  Returns:
2962
3001
  `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
2963
3002
 
@@ -2980,13 +3019,23 @@ class AsyncInferenceClient:
2980
3019
  [ZeroShotImageClassificationOutputElement(label='dog', score=0.956),...]
2981
3020
  ```
2982
3021
  """
3022
+ # handle deprecation
3023
+ if labels is not None:
3024
+ if candidate_labels is not None:
3025
+ raise ValueError(
3026
+ "Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
3027
+ )
3028
+ candidate_labels = labels
3029
+ elif candidate_labels is None:
3030
+ raise ValueError("Must specify `candidate_labels`")
2983
3031
  # Raise ValueError if input is less than 2 labels
2984
- if len(labels) < 2:
3032
+ if len(candidate_labels) < 2:
2985
3033
  raise ValueError("You must specify at least 2 classes to compare.")
2986
-
2987
- inputs = {"image": _b64_encode(image), "candidateLabels": ",".join(labels)}
2988
- parameters = {"hypothesis_template": hypothesis_template}
2989
- payload = _prepare_payload(inputs, parameters=parameters)
3034
+ parameters = {
3035
+ "candidate_labels": candidate_labels,
3036
+ "hypothesis_template": hypothesis_template,
3037
+ }
3038
+ payload = _prepare_payload(image, parameters=parameters, expect_binary=True)
2990
3039
  response = await self.post(
2991
3040
  **payload,
2992
3041
  model=model,