huggingface-hub 0.26.4__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (61) hide show
  1. huggingface_hub/__init__.py +49 -23
  2. huggingface_hub/_commit_scheduler.py +30 -4
  3. huggingface_hub/_local_folder.py +0 -4
  4. huggingface_hub/_login.py +38 -54
  5. huggingface_hub/_snapshot_download.py +6 -3
  6. huggingface_hub/_tensorboard_logger.py +2 -3
  7. huggingface_hub/_upload_large_folder.py +1 -1
  8. huggingface_hub/errors.py +19 -0
  9. huggingface_hub/fastai_utils.py +3 -2
  10. huggingface_hub/file_download.py +10 -12
  11. huggingface_hub/hf_api.py +102 -498
  12. huggingface_hub/hf_file_system.py +274 -35
  13. huggingface_hub/hub_mixin.py +5 -25
  14. huggingface_hub/inference/_client.py +185 -136
  15. huggingface_hub/inference/_common.py +2 -2
  16. huggingface_hub/inference/_generated/_async_client.py +186 -137
  17. huggingface_hub/inference/_generated/types/__init__.py +31 -10
  18. huggingface_hub/inference/_generated/types/audio_classification.py +3 -5
  19. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +6 -9
  20. huggingface_hub/inference/_generated/types/chat_completion.py +8 -5
  21. huggingface_hub/inference/_generated/types/depth_estimation.py +1 -1
  22. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -6
  23. huggingface_hub/inference/_generated/types/feature_extraction.py +1 -1
  24. huggingface_hub/inference/_generated/types/fill_mask.py +2 -4
  25. huggingface_hub/inference/_generated/types/image_classification.py +3 -5
  26. huggingface_hub/inference/_generated/types/image_segmentation.py +2 -4
  27. huggingface_hub/inference/_generated/types/image_to_image.py +2 -4
  28. huggingface_hub/inference/_generated/types/image_to_text.py +6 -9
  29. huggingface_hub/inference/_generated/types/object_detection.py +2 -4
  30. huggingface_hub/inference/_generated/types/question_answering.py +2 -4
  31. huggingface_hub/inference/_generated/types/sentence_similarity.py +1 -1
  32. huggingface_hub/inference/_generated/types/summarization.py +2 -4
  33. huggingface_hub/inference/_generated/types/table_question_answering.py +21 -3
  34. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -4
  35. huggingface_hub/inference/_generated/types/text_classification.py +4 -10
  36. huggingface_hub/inference/_generated/types/text_to_audio.py +7 -10
  37. huggingface_hub/inference/_generated/types/text_to_image.py +2 -4
  38. huggingface_hub/inference/_generated/types/text_to_speech.py +7 -10
  39. huggingface_hub/inference/_generated/types/token_classification.py +11 -12
  40. huggingface_hub/inference/_generated/types/translation.py +2 -4
  41. huggingface_hub/inference/_generated/types/video_classification.py +3 -4
  42. huggingface_hub/inference/_generated/types/visual_question_answering.py +2 -5
  43. huggingface_hub/inference/_generated/types/zero_shot_classification.py +8 -18
  44. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +9 -19
  45. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +7 -9
  46. huggingface_hub/keras_mixin.py +3 -2
  47. huggingface_hub/lfs.py +2 -5
  48. huggingface_hub/repocard_data.py +4 -4
  49. huggingface_hub/serialization/__init__.py +2 -0
  50. huggingface_hub/serialization/_dduf.py +387 -0
  51. huggingface_hub/serialization/_torch.py +361 -14
  52. huggingface_hub/utils/_cache_manager.py +1 -1
  53. huggingface_hub/utils/_headers.py +9 -25
  54. huggingface_hub/utils/tqdm.py +15 -0
  55. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/METADATA +8 -3
  56. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/RECORD +60 -60
  57. huggingface_hub/_multi_commits.py +0 -306
  58. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/LICENSE +0 -0
  59. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/WHEEL +0 -0
  60. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/entry_points.txt +0 -0
  61. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/top_level.txt +0 -0
@@ -70,17 +70,24 @@ from huggingface_hub.inference._generated.types import (
70
70
  AutomaticSpeechRecognitionOutput,
71
71
  ChatCompletionInputGrammarType,
72
72
  ChatCompletionInputStreamOptions,
73
- ChatCompletionInputToolType,
73
+ ChatCompletionInputTool,
74
+ ChatCompletionInputToolChoiceClass,
75
+ ChatCompletionInputToolChoiceEnum,
74
76
  ChatCompletionOutput,
75
77
  ChatCompletionStreamOutput,
76
78
  DocumentQuestionAnsweringOutputElement,
77
79
  FillMaskOutputElement,
78
80
  ImageClassificationOutputElement,
81
+ ImageClassificationOutputTransform,
79
82
  ImageSegmentationOutputElement,
83
+ ImageSegmentationSubtask,
84
+ ImageToImageTargetSize,
80
85
  ImageToTextOutput,
81
86
  ObjectDetectionOutputElement,
87
+ Padding,
82
88
  QuestionAnsweringOutputElement,
83
89
  SummarizationOutput,
90
+ SummarizationTruncationStrategy,
84
91
  TableQuestionAnsweringOutputElement,
85
92
  TextClassificationOutputElement,
86
93
  TextClassificationOutputTransform,
@@ -89,9 +96,10 @@ from huggingface_hub.inference._generated.types import (
89
96
  TextGenerationStreamOutput,
90
97
  TextToImageTargetSize,
91
98
  TextToSpeechEarlyStoppingEnum,
99
+ TokenClassificationAggregationStrategy,
92
100
  TokenClassificationOutputElement,
93
- ToolElement,
94
101
  TranslationOutput,
102
+ TranslationTruncationStrategy,
95
103
  VisualQuestionAnsweringOutputElement,
96
104
  ZeroShotClassificationOutputElement,
97
105
  ZeroShotImageClassificationOutputElement,
@@ -178,7 +186,9 @@ class InferenceClient:
178
186
 
179
187
  self.model: Optional[str] = model
180
188
  self.token: Union[str, bool, None] = token if token is not None else api_key
181
- self.headers = CaseInsensitiveDict(build_hf_headers(token=self.token)) # 'authorization' + 'user-agent'
189
+ self.headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(
190
+ build_hf_headers(token=self.token) # 'authorization' + 'user-agent'
191
+ )
182
192
  if headers is not None:
183
193
  self.headers.update(headers)
184
194
  self.cookies = cookies
@@ -341,7 +351,7 @@ class InferenceClient:
341
351
  top_k (`int`, *optional*):
342
352
  When specified, limits the output to the top K most probable classes.
343
353
  function_to_apply (`"AudioClassificationOutputTransform"`, *optional*):
344
- The function to apply to the output.
354
+ The function to apply to the model outputs in order to retrieve the scores.
345
355
 
346
356
  Returns:
347
357
  `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
@@ -466,9 +476,9 @@ class InferenceClient:
466
476
  stop: Optional[List[str]] = None,
467
477
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
468
478
  temperature: Optional[float] = None,
469
- tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None,
479
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
470
480
  tool_prompt: Optional[str] = None,
471
- tools: Optional[List[ToolElement]] = None,
481
+ tools: Optional[List[ChatCompletionInputTool]] = None,
472
482
  top_logprobs: Optional[int] = None,
473
483
  top_p: Optional[float] = None,
474
484
  ) -> ChatCompletionOutput: ...
@@ -491,9 +501,9 @@ class InferenceClient:
491
501
  stop: Optional[List[str]] = None,
492
502
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
493
503
  temperature: Optional[float] = None,
494
- tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None,
504
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
495
505
  tool_prompt: Optional[str] = None,
496
- tools: Optional[List[ToolElement]] = None,
506
+ tools: Optional[List[ChatCompletionInputTool]] = None,
497
507
  top_logprobs: Optional[int] = None,
498
508
  top_p: Optional[float] = None,
499
509
  ) -> Iterable[ChatCompletionStreamOutput]: ...
@@ -516,9 +526,9 @@ class InferenceClient:
516
526
  stop: Optional[List[str]] = None,
517
527
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
518
528
  temperature: Optional[float] = None,
519
- tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None,
529
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
520
530
  tool_prompt: Optional[str] = None,
521
- tools: Optional[List[ToolElement]] = None,
531
+ tools: Optional[List[ChatCompletionInputTool]] = None,
522
532
  top_logprobs: Optional[int] = None,
523
533
  top_p: Optional[float] = None,
524
534
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
@@ -541,9 +551,9 @@ class InferenceClient:
541
551
  stop: Optional[List[str]] = None,
542
552
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
543
553
  temperature: Optional[float] = None,
544
- tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None,
554
+ tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
545
555
  tool_prompt: Optional[str] = None,
546
- tools: Optional[List[ToolElement]] = None,
556
+ tools: Optional[List[ChatCompletionInputTool]] = None,
547
557
  top_logprobs: Optional[int] = None,
548
558
  top_p: Optional[float] = None,
549
559
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
@@ -582,7 +592,7 @@ class InferenceClient:
582
592
  Whether to return log probabilities of the output tokens or not. If true, returns the log
583
593
  probabilities of each output token returned in the content of message.
584
594
  max_tokens (`int`, *optional*):
585
- Maximum number of tokens allowed in the response. Defaults to 20.
595
+ Maximum number of tokens allowed in the response. Defaults to 100.
586
596
  n (`int`, *optional*):
587
597
  UNUSED.
588
598
  presence_penalty (`float`, *optional*):
@@ -609,11 +619,11 @@ class InferenceClient:
609
619
  top_p (`float`, *optional*):
610
620
  Fraction of the most likely next words to sample from.
611
621
  Must be between 0 and 1. Defaults to 1.0.
612
- tool_choice ([`ChatCompletionInputToolType`] or `str`, *optional*):
622
+ tool_choice ([`ChatCompletionInputToolChoiceClass`] or [`ChatCompletionInputToolChoiceEnum`], *optional*):
613
623
  The tool to use for the completion. Defaults to "auto".
614
624
  tool_prompt (`str`, *optional*):
615
625
  A prompt to be appended before the tools.
616
- tools (List of [`ToolElement`], *optional*):
626
+ tools (List of [`ChatCompletionInputTool`], *optional*):
617
627
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
618
628
  provide a list of functions the model may generate JSON inputs for.
619
629
 
@@ -939,28 +949,25 @@ class InferenceClient:
939
949
  a deployed Inference Endpoint. If not provided, the default recommended document question answering model will be used.
940
950
  Defaults to None.
941
951
  doc_stride (`int`, *optional*):
942
- If the words in the document are too long to fit with the question for the model, it will
943
- be split in several chunks with some overlap. This argument controls the size of that
944
- overlap.
952
+ If the words in the document are too long to fit with the question for the model, it will be split in
953
+ several chunks with some overlap. This argument controls the size of that overlap.
945
954
  handle_impossible_answer (`bool`, *optional*):
946
- Whether to accept impossible as an answer.
955
+ Whether to accept impossible as an answer
947
956
  lang (`str`, *optional*):
948
- Language to use while running OCR.
957
+ Language to use while running OCR. Defaults to english.
949
958
  max_answer_len (`int`, *optional*):
950
- The maximum length of predicted answers (e.g., only answers with a shorter length are
951
- considered).
959
+ The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
952
960
  max_question_len (`int`, *optional*):
953
961
  The maximum length of the question after tokenization. It will be truncated if needed.
954
962
  max_seq_len (`int`, *optional*):
955
- The maximum length of the total sentence (context + question) in tokens of each chunk
956
- passed to the model. The context will be split in several chunks (using doc_stride as
957
- overlap) if needed.
963
+ The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
964
+ model. The context will be split in several chunks (using doc_stride as overlap) if needed.
958
965
  top_k (`int`, *optional*):
959
- The number of answers to return (will be chosen by order of likelihood). Can return less
960
- than top_k answers if there are not enough options available within the context.
961
- word_boxes (`List[Union[List[float], str]]`, *optional*):
962
- A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
963
- skip the OCR step and use the provided bounding boxes instead.
966
+ The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
967
+ answers if there are not enough options available within the context.
968
+ word_boxes (`List[Union[List[float], str`, *optional*):
969
+ A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
970
+ step and use the provided bounding boxes instead.
964
971
  Returns:
965
972
  `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
966
973
 
@@ -976,7 +983,7 @@ class InferenceClient:
976
983
  >>> from huggingface_hub import InferenceClient
977
984
  >>> client = InferenceClient()
978
985
  >>> client.document_question_answering(image="https://huggingface.co/spaces/impira/docquery/resolve/2359223c1837a7587402bda0f2643382a6eefeab/invoice.png", question="What is the invoice number?")
979
- [DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16, words=None)]
986
+ [DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
980
987
  ```
981
988
  """
982
989
  inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
@@ -1077,11 +1084,10 @@ class InferenceClient:
1077
1084
  model (`str`, *optional*):
1078
1085
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1079
1086
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1080
- targets (`List[str]`, *optional*):
1081
- When passed, the model will limit the scores to the passed targets instead of looking up
1082
- in the whole vocabulary. If the provided targets are not in the model vocab, they will be
1083
- tokenized and the first resulting token will be used (with a warning, and that might be
1084
- slower).
1087
+ targets (`List[str`, *optional*):
1088
+ When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1089
+ vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1090
+ resulting token will be used (with a warning, and that might be slower).
1085
1091
  top_k (`int`, *optional*):
1086
1092
  When passed, overrides the number of predictions to return.
1087
1093
  Returns:
@@ -1115,7 +1121,7 @@ class InferenceClient:
1115
1121
  image: ContentT,
1116
1122
  *,
1117
1123
  model: Optional[str] = None,
1118
- function_to_apply: Optional[Literal["sigmoid", "softmax", "none"]] = None,
1124
+ function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1119
1125
  top_k: Optional[int] = None,
1120
1126
  ) -> List[ImageClassificationOutputElement]:
1121
1127
  """
@@ -1127,8 +1133,8 @@ class InferenceClient:
1127
1133
  model (`str`, *optional*):
1128
1134
  The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1129
1135
  deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
1130
- function_to_apply (`Literal["sigmoid", "softmax", "none"]`, *optional*):
1131
- The function to apply to the output scores.
1136
+ function_to_apply (`"ImageClassificationOutputTransform"`, *optional*):
1137
+ The function to apply to the model outputs in order to retrieve the scores.
1132
1138
  top_k (`int`, *optional*):
1133
1139
  When specified, limits the output to the top K most probable classes.
1134
1140
  Returns:
@@ -1160,7 +1166,7 @@ class InferenceClient:
1160
1166
  model: Optional[str] = None,
1161
1167
  mask_threshold: Optional[float] = None,
1162
1168
  overlap_mask_area_threshold: Optional[float] = None,
1163
- subtask: Optional[Literal["instance", "panoptic", "semantic"]] = None,
1169
+ subtask: Optional["ImageSegmentationSubtask"] = None,
1164
1170
  threshold: Optional[float] = None,
1165
1171
  ) -> List[ImageSegmentationOutputElement]:
1166
1172
  """
@@ -1182,7 +1188,7 @@ class InferenceClient:
1182
1188
  Threshold to use when turning the predicted masks into binary values.
1183
1189
  overlap_mask_area_threshold (`float`, *optional*):
1184
1190
  Mask overlap threshold to eliminate small, disconnected segments.
1185
- subtask (`Literal["instance", "panoptic", "semantic"]`, *optional*):
1191
+ subtask (`"ImageSegmentationSubtask"`, *optional*):
1186
1192
  Segmentation task to be performed, depending on model capabilities.
1187
1193
  threshold (`float`, *optional*):
1188
1194
  Probability threshold to filter out predicted masks.
@@ -1221,12 +1227,11 @@ class InferenceClient:
1221
1227
  image: ContentT,
1222
1228
  prompt: Optional[str] = None,
1223
1229
  *,
1224
- negative_prompt: Optional[str] = None,
1225
- height: Optional[int] = None,
1226
- width: Optional[int] = None,
1230
+ negative_prompt: Optional[List[str]] = None,
1227
1231
  num_inference_steps: Optional[int] = None,
1228
1232
  guidance_scale: Optional[float] = None,
1229
1233
  model: Optional[str] = None,
1234
+ target_size: Optional[ImageToImageTargetSize] = None,
1230
1235
  **kwargs,
1231
1236
  ) -> "Image":
1232
1237
  """
@@ -1243,21 +1248,19 @@ class InferenceClient:
1243
1248
  The input image for translation. It can be raw bytes, an image file, or a URL to an online image.
1244
1249
  prompt (`str`, *optional*):
1245
1250
  The text prompt to guide the image generation.
1246
- negative_prompt (`str`, *optional*):
1247
- A negative prompt to guide the translation process.
1248
- height (`int`, *optional*):
1249
- The height in pixels of the generated image.
1250
- width (`int`, *optional*):
1251
- The width in pixels of the generated image.
1251
+ negative_prompt (`List[str]`, *optional*):
1252
+ One or several prompt to guide what NOT to include in image generation.
1252
1253
  num_inference_steps (`int`, *optional*):
1253
- The number of denoising steps. More denoising steps usually lead to a higher quality image at the
1254
- expense of slower inference.
1254
+ For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
1255
+ quality image at the expense of slower inference.
1255
1256
  guidance_scale (`float`, *optional*):
1256
- Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1257
- usually at the expense of lower image quality.
1257
+ For diffusion models. A higher guidance scale value encourages the model to generate images closely
1258
+ linked to the text prompt at the expense of lower image quality.
1258
1259
  model (`str`, *optional*):
1259
1260
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1260
1261
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
1262
+ target_size (`ImageToImageTargetSize`, *optional*):
1263
+ The size in pixel of the output image.
1261
1264
 
1262
1265
  Returns:
1263
1266
  `Image`: The translated image.
@@ -1279,8 +1282,7 @@ class InferenceClient:
1279
1282
  parameters = {
1280
1283
  "prompt": prompt,
1281
1284
  "negative_prompt": negative_prompt,
1282
- "height": height,
1283
- "width": width,
1285
+ "target_size": target_size,
1284
1286
  "num_inference_steps": num_inference_steps,
1285
1287
  "guidance_scale": guidance_scale,
1286
1288
  **kwargs,
@@ -1481,26 +1483,24 @@ class InferenceClient:
1481
1483
  The model to use for the question answering task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1482
1484
  a deployed Inference Endpoint.
1483
1485
  align_to_words (`bool`, *optional*):
1484
- Attempts to align the answer to real words. Improves quality on space separated
1485
- languages. Might hurt on non-space-separated languages (like Japanese or Chinese).
1486
+ Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt
1487
+ on non-space-separated languages (like Japanese or Chinese)
1486
1488
  doc_stride (`int`, *optional*):
1487
- If the context is too long to fit with the question for the model, it will be split in
1488
- several chunks with some overlap. This argument controls the size of that overlap.
1489
+ If the context is too long to fit with the question for the model, it will be split in several chunks
1490
+ with some overlap. This argument controls the size of that overlap.
1489
1491
  handle_impossible_answer (`bool`, *optional*):
1490
1492
  Whether to accept impossible as an answer.
1491
1493
  max_answer_len (`int`, *optional*):
1492
- The maximum length of predicted answers (e.g., only answers with a shorter length are
1493
- considered).
1494
+ The maximum length of predicted answers (e.g., only answers with a shorter length are considered).
1494
1495
  max_question_len (`int`, *optional*):
1495
1496
  The maximum length of the question after tokenization. It will be truncated if needed.
1496
1497
  max_seq_len (`int`, *optional*):
1497
- The maximum length of the total sentence (context + question) in tokens of each chunk
1498
- passed to the model. The context will be split in several chunks (using docStride as
1499
- overlap) if needed.
1498
+ The maximum length of the total sentence (context + question) in tokens of each chunk passed to the
1499
+ model. The context will be split in several chunks (using docStride as overlap) if needed.
1500
1500
  top_k (`int`, *optional*):
1501
- The number of answers to return (will be chosen by order of likelihood). Note that we
1502
- return less than topk answers if there are not enough options available within the
1503
- context.
1501
+ The number of answers to return (will be chosen by order of likelihood). Note that we return less than
1502
+ topk answers if there are not enough options available within the context.
1503
+
1504
1504
  Returns:
1505
1505
  Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1506
1506
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
@@ -1602,7 +1602,7 @@ class InferenceClient:
1602
1602
  model: Optional[str] = None,
1603
1603
  clean_up_tokenization_spaces: Optional[bool] = None,
1604
1604
  generate_parameters: Optional[Dict[str, Any]] = None,
1605
- truncation: Optional[Literal["do_not_truncate", "longest_first", "only_first", "only_second"]] = None,
1605
+ truncation: Optional["SummarizationTruncationStrategy"] = None,
1606
1606
  ) -> SummarizationOutput:
1607
1607
  """
1608
1608
  Generate a summary of a given text using a specified model.
@@ -1620,7 +1620,7 @@ class InferenceClient:
1620
1620
  Whether to clean up the potential extra spaces in the text output.
1621
1621
  generate_parameters (`Dict[str, Any]`, *optional*):
1622
1622
  Additional parametrization of the text generation algorithm.
1623
- truncation (`Literal["do_not_truncate", "longest_first", "only_first", "only_second"]`, *optional*):
1623
+ truncation (`"SummarizationTruncationStrategy"`, *optional*):
1624
1624
  The truncation strategy to use.
1625
1625
  Returns:
1626
1626
  [`SummarizationOutput`]: The generated summary text.
@@ -1655,7 +1655,9 @@ class InferenceClient:
1655
1655
  query: str,
1656
1656
  *,
1657
1657
  model: Optional[str] = None,
1658
- parameters: Optional[Dict[str, Any]] = None,
1658
+ padding: Optional["Padding"] = None,
1659
+ sequential: Optional[bool] = None,
1660
+ truncation: Optional[bool] = None,
1659
1661
  ) -> TableQuestionAnsweringOutputElement:
1660
1662
  """
1661
1663
  Retrieve the answer to a question from information given in a table.
@@ -1669,8 +1671,14 @@ class InferenceClient:
1669
1671
  model (`str`):
1670
1672
  The model to use for the table-question-answering task. Can be a model ID hosted on the Hugging Face
1671
1673
  Hub or a URL to a deployed Inference Endpoint.
1672
- parameters (`Dict[str, Any]`, *optional*):
1673
- Additional inference parameters. Defaults to None.
1674
+ padding (`"Padding"`, *optional*):
1675
+ Activates and controls padding.
1676
+ sequential (`bool`, *optional*):
1677
+ Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
1678
+ inference to be done sequentially to extract relations within sequences, given their conversational
1679
+ nature.
1680
+ truncation (`bool`, *optional*):
1681
+ Activates and controls truncation.
1674
1682
 
1675
1683
  Returns:
1676
1684
  [`TableQuestionAnsweringOutputElement`]: a table question answering output containing the answer, coordinates, cells and the aggregator used.
@@ -1691,6 +1699,11 @@ class InferenceClient:
1691
1699
  TableQuestionAnsweringOutputElement(answer='36542', coordinates=[[0, 1]], cells=['36542'], aggregator='AVERAGE')
1692
1700
  ```
1693
1701
  """
1702
+ parameters = {
1703
+ "padding": padding,
1704
+ "sequential": sequential,
1705
+ "truncation": truncation,
1706
+ }
1694
1707
  inputs = {
1695
1708
  "query": query,
1696
1709
  "table": table,
@@ -1813,7 +1826,7 @@ class InferenceClient:
1813
1826
  top_k (`int`, *optional*):
1814
1827
  When specified, limits the output to the top K most probable classes.
1815
1828
  function_to_apply (`"TextClassificationOutputTransform"`, *optional*):
1816
- The function to apply to the output.
1829
+ The function to apply to the model outputs in order to retrieve the scores.
1817
1830
 
1818
1831
  Returns:
1819
1832
  `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
@@ -2073,7 +2086,7 @@ class InferenceClient:
2073
2086
  grammar ([`TextGenerationInputGrammarType`], *optional*):
2074
2087
  Grammar constraints. Can be either a JSONSchema or a regex.
2075
2088
  max_new_tokens (`int`, *optional*):
2076
- Maximum number of generated tokens
2089
+ Maximum number of generated tokens. Defaults to 100.
2077
2090
  repetition_penalty (`float`, *optional*):
2078
2091
  The parameter for repetition penalty. 1.0 means no penalty. See [this
2079
2092
  paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
@@ -2347,10 +2360,10 @@ class InferenceClient:
2347
2360
  self,
2348
2361
  prompt: str,
2349
2362
  *,
2350
- negative_prompt: Optional[str] = None,
2363
+ negative_prompt: Optional[List[str]] = None,
2351
2364
  height: Optional[float] = None,
2352
2365
  width: Optional[float] = None,
2353
- num_inference_steps: Optional[float] = None,
2366
+ num_inference_steps: Optional[int] = None,
2354
2367
  guidance_scale: Optional[float] = None,
2355
2368
  model: Optional[str] = None,
2356
2369
  scheduler: Optional[str] = None,
@@ -2370,8 +2383,8 @@ class InferenceClient:
2370
2383
  Args:
2371
2384
  prompt (`str`):
2372
2385
  The prompt to generate an image from.
2373
- negative_prompt (`str`, *optional*):
2374
- An optional negative prompt for the image generation.
2386
+ negative_prompt (`List[str`, *optional*):
2387
+ One or several prompt to guide what NOT to include in image generation.
2375
2388
  height (`float`, *optional*):
2376
2389
  The height in pixels of the image to generate.
2377
2390
  width (`float`, *optional*):
@@ -2380,8 +2393,8 @@ class InferenceClient:
2380
2393
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
2381
2394
  expense of slower inference.
2382
2395
  guidance_scale (`float`, *optional*):
2383
- Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
2384
- usually at the expense of lower image quality.
2396
+ A higher guidance scale value encourages the model to generate images closely linked to the text
2397
+ prompt, but values too high may cause saturation and other artifacts.
2385
2398
  model (`str`, *optional*):
2386
2399
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
2387
2400
  Inference Endpoint. If not provided, the default recommended text-to-image model will be used.
@@ -2468,44 +2481,42 @@ class InferenceClient:
2468
2481
  Defaults to None.
2469
2482
  do_sample (`bool`, *optional*):
2470
2483
  Whether to use sampling instead of greedy decoding when generating new tokens.
2471
- early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"`, *optional*):
2484
+ early_stopping (`Union[bool, "TextToSpeechEarlyStoppingEnum"]`, *optional*):
2472
2485
  Controls the stopping condition for beam-based methods.
2473
2486
  epsilon_cutoff (`float`, *optional*):
2474
- If set to float strictly between 0 and 1, only tokens with a conditional probability
2475
- greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
2476
- 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
2477
- Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
2487
+ If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
2488
+ epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on
2489
+ the size of the model. See [Truncation Sampling as Language Model
2490
+ Desmoothing](https://hf.co/papers/2210.15191) for more details.
2478
2491
  eta_cutoff (`float`, *optional*):
2479
- Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
2480
- float strictly between 0 and 1, a token is only considered if it is greater than either
2481
- eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
2482
- term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
2483
- the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
2484
- See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
2485
- for more details.
2492
+ Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly
2493
+ between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff)
2494
+ * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token
2495
+ probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3,
2496
+ depending on the size of the model. See [Truncation Sampling as Language Model
2497
+ Desmoothing](https://hf.co/papers/2210.15191) for more details.
2486
2498
  max_length (`int`, *optional*):
2487
2499
  The maximum length (in tokens) of the generated text, including the input.
2488
2500
  max_new_tokens (`int`, *optional*):
2489
- The maximum number of tokens to generate. Takes precedence over maxLength.
2501
+ The maximum number of tokens to generate. Takes precedence over max_length.
2490
2502
  min_length (`int`, *optional*):
2491
2503
  The minimum length (in tokens) of the generated text, including the input.
2492
2504
  min_new_tokens (`int`, *optional*):
2493
- The minimum number of tokens to generate. Takes precedence over maxLength.
2505
+ The minimum number of tokens to generate. Takes precedence over min_length.
2494
2506
  num_beam_groups (`int`, *optional*):
2495
- Number of groups to divide num_beams into in order to ensure diversity among different
2496
- groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
2507
+ Number of groups to divide num_beams into in order to ensure diversity among different groups of beams.
2508
+ See [this paper](https://hf.co/papers/1610.02424) for more details.
2497
2509
  num_beams (`int`, *optional*):
2498
2510
  Number of beams to use for beam search.
2499
2511
  penalty_alpha (`float`, *optional*):
2500
- The value balances the model confidence and the degeneration penalty in contrastive
2501
- search decoding.
2512
+ The value balances the model confidence and the degeneration penalty in contrastive search decoding.
2502
2513
  temperature (`float`, *optional*):
2503
2514
  The value used to modulate the next token probabilities.
2504
2515
  top_k (`int`, *optional*):
2505
2516
  The number of highest probability vocabulary tokens to keep for top-k-filtering.
2506
2517
  top_p (`float`, *optional*):
2507
- If set to float < 1, only the smallest set of most probable tokens with probabilities
2508
- that add up to top_p or higher are kept for generation.
2518
+ If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
2519
+ top_p or higher are kept for generation.
2509
2520
  typical_p (`float`, *optional*):
2510
2521
  Local typicality measures how similar the conditional probability of predicting a target token next is
2511
2522
  to the expected conditional probability of predicting a random token next, given the partial text
@@ -2561,7 +2572,7 @@ class InferenceClient:
2561
2572
  text: str,
2562
2573
  *,
2563
2574
  model: Optional[str] = None,
2564
- aggregation_strategy: Optional[Literal["none", "simple", "first", "average", "max"]] = None,
2575
+ aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2565
2576
  ignore_labels: Optional[List[str]] = None,
2566
2577
  stride: Optional[int] = None,
2567
2578
  ) -> List[TokenClassificationOutputElement]:
@@ -2576,10 +2587,10 @@ class InferenceClient:
2576
2587
  The model to use for the token classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
2577
2588
  a deployed Inference Endpoint. If not provided, the default recommended token classification model will be used.
2578
2589
  Defaults to None.
2579
- aggregation_strategy (`Literal["none", "simple", "first", "average", "max"]`, *optional*):
2580
- The strategy used to fuse tokens based on model predictions.
2581
- ignore_labels (`List[str]`, *optional*):
2582
- A list of labels to ignore.
2590
+ aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2591
+ The strategy used to fuse tokens based on model predictions
2592
+ ignore_labels (`List[str`, *optional*):
2593
+ A list of labels to ignore
2583
2594
  stride (`int`, *optional*):
2584
2595
  The number of overlapping tokens between chunks when splitting the input text.
2585
2596
 
@@ -2637,7 +2648,7 @@ class InferenceClient:
2637
2648
  src_lang: Optional[str] = None,
2638
2649
  tgt_lang: Optional[str] = None,
2639
2650
  clean_up_tokenization_spaces: Optional[bool] = None,
2640
- truncation: Optional[Literal["do_not_truncate", "longest_first", "only_first", "only_second"]] = None,
2651
+ truncation: Optional["TranslationTruncationStrategy"] = None,
2641
2652
  generate_parameters: Optional[Dict[str, Any]] = None,
2642
2653
  ) -> TranslationOutput:
2643
2654
  """
@@ -2661,7 +2672,7 @@ class InferenceClient:
2661
2672
  Target language to translate to. Required for models that can translate to multiple languages.
2662
2673
  clean_up_tokenization_spaces (`bool`, *optional*):
2663
2674
  Whether to clean up the potential extra spaces in the text output.
2664
- truncation (`Literal["do_not_truncate", "longest_first", "only_first", "only_second"]`, *optional*):
2675
+ truncation (`"TranslationTruncationStrategy"`, *optional*):
2665
2676
  The truncation strategy to use.
2666
2677
  generate_parameters (`Dict[str, Any]`, *optional*):
2667
2678
  Additional parametrization of the text generation algorithm.
@@ -2684,13 +2695,13 @@ class InferenceClient:
2684
2695
  >>> client.translation("My name is Wolfgang and I live in Berlin")
2685
2696
  'Mein Name ist Wolfgang und ich lebe in Berlin.'
2686
2697
  >>> client.translation("My name is Wolfgang and I live in Berlin", model="Helsinki-NLP/opus-mt-en-fr")
2687
- TranslationOutput(translation_text='Je m\'appelle Wolfgang et je vis à Berlin.')
2698
+ TranslationOutput(translation_text='Je m'appelle Wolfgang et je vis à Berlin.')
2688
2699
  ```
2689
2700
 
2690
2701
  Specifying languages:
2691
2702
  ```py
2692
2703
  >>> client.translation("My name is Sarah Jessica Parker but you can call me Jessica", model="facebook/mbart-large-50-many-to-many-mmt", src_lang="en_XX", tgt_lang="fr_XX")
2693
- "Mon nom est Sarah Jessica Parker mais vous pouvez m\'appeler Jessica"
2704
+ "Mon nom est Sarah Jessica Parker mais vous pouvez m'appeler Jessica"
2694
2705
  ```
2695
2706
  """
2696
2707
  # Throw error if only one of `src_lang` and `tgt_lang` was given
@@ -2731,9 +2742,8 @@ class InferenceClient:
2731
2742
  a deployed Inference Endpoint. If not provided, the default recommended visual question answering model will be used.
2732
2743
  Defaults to None.
2733
2744
  top_k (`int`, *optional*):
2734
- The number of answers to return (will be chosen by order of likelihood). Note that we
2735
- return less than topk answers if there are not enough options available within the
2736
- context.
2745
+ The number of answers to return (will be chosen by order of likelihood). Note that we return less than
2746
+ topk answers if there are not enough options available within the context.
2737
2747
  Returns:
2738
2748
  `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
2739
2749
 
@@ -2763,14 +2773,22 @@ class InferenceClient:
2763
2773
  response = self.post(json=payload, model=model, task="visual-question-answering")
2764
2774
  return VisualQuestionAnsweringOutputElement.parse_obj_as_list(response)
2765
2775
 
2776
+ @_deprecate_arguments(
2777
+ version="0.30.0",
2778
+ deprecated_args=["labels"],
2779
+ custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
2780
+ )
2766
2781
  def zero_shot_classification(
2767
2782
  self,
2768
2783
  text: str,
2769
- labels: List[str],
2784
+ # temporarily keeping it optional for backward compatibility.
2785
+ candidate_labels: List[str] = None, # type: ignore
2770
2786
  *,
2771
- multi_label: bool = False,
2787
+ multi_label: Optional[bool] = False,
2772
2788
  hypothesis_template: Optional[str] = None,
2773
2789
  model: Optional[str] = None,
2790
+ # deprecated argument
2791
+ labels: List[str] = None, # type: ignore
2774
2792
  ) -> List[ZeroShotClassificationOutputElement]:
2775
2793
  """
2776
2794
  Provide as input a text and a set of candidate labels to classify the input text.
@@ -2778,20 +2796,22 @@ class InferenceClient:
2778
2796
  Args:
2779
2797
  text (`str`):
2780
2798
  The input text to classify.
2781
- labels (`List[str]`):
2782
- List of strings. Each string is the verbalization of a possible label for the input text.
2783
- multi_label (`bool`):
2784
- Boolean. If True, the probability for each label is evaluated independently and multiple labels can have a probability close to 1 simultaneously or all probabilities can be close to 0.
2785
- If False, the labels are considered mutually exclusive and the probability over all labels always sums to 1. Defaults to False.
2799
+ candidate_labels (`List[str]`):
2800
+ The set of possible class labels to classify the text into.
2801
+ labels (`List[str]`, *optional*):
2802
+ (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
2803
+ multi_label (`bool`, *optional*):
2804
+ Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
2805
+ the label likelihoods for each sequence is 1. If true, the labels are considered independent and
2806
+ probabilities are normalized for each candidate.
2786
2807
  hypothesis_template (`str`, *optional*):
2787
- A template sentence string with curly brackets to which the label strings are added. The label strings are added at the position of the curly brackets "{}".
2788
- Zero-shot classifiers are based on NLI models, which evaluate if a hypothesis is entailed in another text or not.
2789
- For example, with hypothesis_template="This text is about {}." and labels=["economics", "politics"], the system internally creates the two hypotheses "This text is about economics." and "This text is about politics.".
2790
- The model then evaluates for both hypotheses if they are entailed in the provided `text` or not.
2808
+ The sentence used in conjunction with `candidate_labels` to attempt the text classification by
2809
+ replacing the placeholder with the candidate labels.
2791
2810
  model (`str`, *optional*):
2792
2811
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
2793
2812
  Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot classification model will be used.
2794
2813
 
2814
+
2795
2815
  Returns:
2796
2816
  `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
2797
2817
 
@@ -2847,9 +2867,17 @@ class InferenceClient:
2847
2867
  ]
2848
2868
  ```
2849
2869
  """
2850
-
2870
+ # handle deprecation
2871
+ if labels is not None:
2872
+ if candidate_labels is not None:
2873
+ raise ValueError(
2874
+ "Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
2875
+ )
2876
+ candidate_labels = labels
2877
+ elif candidate_labels is None:
2878
+ raise ValueError("Must specify `candidate_labels`")
2851
2879
  parameters = {
2852
- "candidate_labels": labels,
2880
+ "candidate_labels": candidate_labels,
2853
2881
  "multi_label": multi_label,
2854
2882
  "hypothesis_template": hypothesis_template,
2855
2883
  }
@@ -2865,13 +2893,21 @@ class InferenceClient:
2865
2893
  for label, score in zip(output["labels"], output["scores"])
2866
2894
  ]
2867
2895
 
2896
+ @_deprecate_arguments(
2897
+ version="0.30.0",
2898
+ deprecated_args=["labels"],
2899
+ custom_message="`labels`has been renamed to `candidate_labels` and will be removed in huggingface_hub>=0.30.0.",
2900
+ )
2868
2901
  def zero_shot_image_classification(
2869
2902
  self,
2870
2903
  image: ContentT,
2871
- labels: List[str],
2904
+ # temporarily keeping it optional for backward compatibility.
2905
+ candidate_labels: List[str] = None, # type: ignore
2872
2906
  *,
2873
2907
  model: Optional[str] = None,
2874
2908
  hypothesis_template: Optional[str] = None,
2909
+ # deprecated argument
2910
+ labels: List[str] = None, # type: ignore
2875
2911
  ) -> List[ZeroShotImageClassificationOutputElement]:
2876
2912
  """
2877
2913
  Provide input image and text labels to predict text labels for the image.
@@ -2879,14 +2915,17 @@ class InferenceClient:
2879
2915
  Args:
2880
2916
  image (`Union[str, Path, bytes, BinaryIO]`):
2881
2917
  The input image to caption. It can be raw bytes, an image file, or a URL to an online image.
2882
- labels (`List[str]`):
2883
- List of string possible labels. There must be at least 2 labels.
2918
+ candidate_labels (`List[str]`):
2919
+ The candidate labels for this image
2920
+ labels (`List[str]`, *optional*):
2921
+ (deprecated) List of string possible labels. There must be at least 2 labels.
2884
2922
  model (`str`, *optional*):
2885
2923
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
2886
2924
  Inference Endpoint. This parameter overrides the model defined at the instance level. If not provided, the default recommended zero-shot image classification model will be used.
2887
2925
  hypothesis_template (`str`, *optional*):
2888
- The sentence used in conjunction with `labels` to attempt the text classification by replacing the
2889
- placeholder with the candidate labels.
2926
+ The sentence used in conjunction with `candidate_labels` to attempt the image classification by
2927
+ replacing the placeholder with the candidate labels.
2928
+
2890
2929
  Returns:
2891
2930
  `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
2892
2931
 
@@ -2908,13 +2947,23 @@ class InferenceClient:
2908
2947
  [ZeroShotImageClassificationOutputElement(label='dog', score=0.956),...]
2909
2948
  ```
2910
2949
  """
2950
+ # handle deprecation
2951
+ if labels is not None:
2952
+ if candidate_labels is not None:
2953
+ raise ValueError(
2954
+ "Cannot specify both `labels` and `candidate_labels`. Use `candidate_labels` instead."
2955
+ )
2956
+ candidate_labels = labels
2957
+ elif candidate_labels is None:
2958
+ raise ValueError("Must specify `candidate_labels`")
2911
2959
  # Raise ValueError if input is less than 2 labels
2912
- if len(labels) < 2:
2960
+ if len(candidate_labels) < 2:
2913
2961
  raise ValueError("You must specify at least 2 classes to compare.")
2914
-
2915
- inputs = {"image": _b64_encode(image), "candidateLabels": ",".join(labels)}
2916
- parameters = {"hypothesis_template": hypothesis_template}
2917
- payload = _prepare_payload(inputs, parameters=parameters)
2962
+ parameters = {
2963
+ "candidate_labels": candidate_labels,
2964
+ "hypothesis_template": hypothesis_template,
2965
+ }
2966
+ payload = _prepare_payload(image, parameters=parameters, expect_binary=True)
2918
2967
  response = self.post(
2919
2968
  **payload,
2920
2969
  model=model,