huggingface-hub 0.25.2__py3-none-any.whl → 0.26.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (45) hide show
  1. huggingface_hub/__init__.py +45 -11
  2. huggingface_hub/_login.py +172 -33
  3. huggingface_hub/commands/user.py +125 -9
  4. huggingface_hub/constants.py +1 -1
  5. huggingface_hub/errors.py +6 -9
  6. huggingface_hub/file_download.py +2 -372
  7. huggingface_hub/hf_api.py +170 -13
  8. huggingface_hub/hf_file_system.py +3 -3
  9. huggingface_hub/hub_mixin.py +2 -1
  10. huggingface_hub/inference/_client.py +500 -145
  11. huggingface_hub/inference/_common.py +42 -4
  12. huggingface_hub/inference/_generated/_async_client.py +499 -144
  13. huggingface_hub/inference/_generated/types/__init__.py +37 -7
  14. huggingface_hub/inference/_generated/types/audio_classification.py +8 -5
  15. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +9 -7
  16. huggingface_hub/inference/_generated/types/chat_completion.py +23 -4
  17. huggingface_hub/inference/_generated/types/image_classification.py +8 -5
  18. huggingface_hub/inference/_generated/types/image_segmentation.py +9 -7
  19. huggingface_hub/inference/_generated/types/image_to_image.py +7 -5
  20. huggingface_hub/inference/_generated/types/image_to_text.py +4 -4
  21. huggingface_hub/inference/_generated/types/object_detection.py +11 -5
  22. huggingface_hub/inference/_generated/types/summarization.py +11 -13
  23. huggingface_hub/inference/_generated/types/text_classification.py +10 -5
  24. huggingface_hub/inference/_generated/types/text_generation.py +1 -0
  25. huggingface_hub/inference/_generated/types/text_to_audio.py +2 -2
  26. huggingface_hub/inference/_generated/types/text_to_image.py +9 -7
  27. huggingface_hub/inference/_generated/types/text_to_speech.py +107 -0
  28. huggingface_hub/inference/_generated/types/translation.py +17 -11
  29. huggingface_hub/inference/_generated/types/video_classification.py +2 -2
  30. huggingface_hub/repocard.py +2 -1
  31. huggingface_hub/repocard_data.py +10 -2
  32. huggingface_hub/serialization/_torch.py +7 -4
  33. huggingface_hub/utils/__init__.py +4 -20
  34. huggingface_hub/utils/{_token.py → _auth.py} +86 -3
  35. huggingface_hub/utils/_headers.py +1 -1
  36. huggingface_hub/utils/_hf_folder.py +1 -1
  37. huggingface_hub/utils/_http.py +10 -4
  38. huggingface_hub/utils/_runtime.py +1 -10
  39. {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.1.dist-info}/METADATA +12 -12
  40. {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.1.dist-info}/RECORD +44 -44
  41. huggingface_hub/inference/_templating.py +0 -102
  42. {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.1.dist-info}/LICENSE +0 -0
  43. {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.1.dist-info}/WHEEL +0 -0
  44. {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.1.dist-info}/entry_points.txt +0 -0
  45. {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.1.dist-info}/top_level.txt +0 -0
@@ -6,10 +6,12 @@
6
6
  from .audio_classification import (
7
7
  AudioClassificationInput,
8
8
  AudioClassificationOutputElement,
9
+ AudioClassificationOutputTransform,
9
10
  AudioClassificationParameters,
10
11
  )
11
12
  from .audio_to_audio import AudioToAudioInput, AudioToAudioOutputElement
12
13
  from .automatic_speech_recognition import (
14
+ AutomaticSpeechRecognitionEarlyStoppingEnum,
13
15
  AutomaticSpeechRecognitionGenerationParameters,
14
16
  AutomaticSpeechRecognitionInput,
15
17
  AutomaticSpeechRecognitionOutput,
@@ -24,8 +26,8 @@ from .chat_completion import (
24
26
  ChatCompletionInputGrammarType,
25
27
  ChatCompletionInputMessage,
26
28
  ChatCompletionInputMessageChunk,
27
- ChatCompletionInputTool,
28
- ChatCompletionInputToolTypeClass,
29
+ ChatCompletionInputStreamOptions,
30
+ ChatCompletionInputToolType,
29
31
  ChatCompletionInputURL,
30
32
  ChatCompletionOutput,
31
33
  ChatCompletionOutputComplete,
@@ -44,6 +46,8 @@ from .chat_completion import (
44
46
  ChatCompletionStreamOutputLogprob,
45
47
  ChatCompletionStreamOutputLogprobs,
46
48
  ChatCompletionStreamOutputTopLogprob,
49
+ ChatCompletionStreamOutputUsage,
50
+ ToolElement,
47
51
  )
48
52
  from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
49
53
  from .document_question_answering import (
@@ -57,11 +61,18 @@ from .fill_mask import FillMaskInput, FillMaskOutputElement, FillMaskParameters
57
61
  from .image_classification import (
58
62
  ImageClassificationInput,
59
63
  ImageClassificationOutputElement,
64
+ ImageClassificationOutputTransform,
60
65
  ImageClassificationParameters,
61
66
  )
62
67
  from .image_segmentation import ImageSegmentationInput, ImageSegmentationOutputElement, ImageSegmentationParameters
63
68
  from .image_to_image import ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToImageTargetSize
64
- from .image_to_text import ImageToTextGenerationParameters, ImageToTextInput, ImageToTextOutput, ImageToTextParameters
69
+ from .image_to_text import (
70
+ ImageToTextEarlyStoppingEnum,
71
+ ImageToTextGenerationParameters,
72
+ ImageToTextInput,
73
+ ImageToTextOutput,
74
+ ImageToTextParameters,
75
+ )
65
76
  from .object_detection import (
66
77
  ObjectDetectionBoundingBox,
67
78
  ObjectDetectionInput,
@@ -75,14 +86,19 @@ from .question_answering import (
75
86
  QuestionAnsweringParameters,
76
87
  )
77
88
  from .sentence_similarity import SentenceSimilarityInput, SentenceSimilarityInputData
78
- from .summarization import SummarizationGenerationParameters, SummarizationInput, SummarizationOutput
89
+ from .summarization import SummarizationInput, SummarizationOutput, SummarizationParameters
79
90
  from .table_question_answering import (
80
91
  TableQuestionAnsweringInput,
81
92
  TableQuestionAnsweringInputData,
82
93
  TableQuestionAnsweringOutputElement,
83
94
  )
84
95
  from .text2text_generation import Text2TextGenerationInput, Text2TextGenerationOutput, Text2TextGenerationParameters
85
- from .text_classification import TextClassificationInput, TextClassificationOutputElement, TextClassificationParameters
96
+ from .text_classification import (
97
+ TextClassificationInput,
98
+ TextClassificationOutputElement,
99
+ TextClassificationOutputTransform,
100
+ TextClassificationParameters,
101
+ )
86
102
  from .text_generation import (
87
103
  TextGenerationInput,
88
104
  TextGenerationInputGenerateParameters,
@@ -96,17 +112,31 @@ from .text_generation import (
96
112
  TextGenerationStreamOutputStreamDetails,
97
113
  TextGenerationStreamOutputToken,
98
114
  )
99
- from .text_to_audio import TextToAudioGenerationParameters, TextToAudioInput, TextToAudioOutput, TextToAudioParameters
115
+ from .text_to_audio import (
116
+ TextToAudioEarlyStoppingEnum,
117
+ TextToAudioGenerationParameters,
118
+ TextToAudioInput,
119
+ TextToAudioOutput,
120
+ TextToAudioParameters,
121
+ )
100
122
  from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters, TextToImageTargetSize
123
+ from .text_to_speech import (
124
+ TextToSpeechEarlyStoppingEnum,
125
+ TextToSpeechGenerationParameters,
126
+ TextToSpeechInput,
127
+ TextToSpeechOutput,
128
+ TextToSpeechParameters,
129
+ )
101
130
  from .token_classification import (
102
131
  TokenClassificationInput,
103
132
  TokenClassificationOutputElement,
104
133
  TokenClassificationParameters,
105
134
  )
106
- from .translation import TranslationGenerationParameters, TranslationInput, TranslationOutput
135
+ from .translation import TranslationInput, TranslationOutput, TranslationParameters
107
136
  from .video_classification import (
108
137
  VideoClassificationInput,
109
138
  VideoClassificationOutputElement,
139
+ VideoClassificationOutputTransform,
110
140
  VideoClassificationParameters,
111
141
  )
112
142
  from .visual_question_answering import (
@@ -4,12 +4,12 @@
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
6
  from dataclasses import dataclass
7
- from typing import Any, Literal, Optional
7
+ from typing import Literal, Optional
8
8
 
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
- ClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
12
+ AudioClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
13
13
 
14
14
 
15
15
  @dataclass
@@ -18,7 +18,8 @@ class AudioClassificationParameters(BaseInferenceType):
18
18
  Additional inference parameters for Audio Classification
19
19
  """
20
20
 
21
- function_to_apply: Optional["ClassificationOutputTransform"] = None
21
+ function_to_apply: Optional["AudioClassificationOutputTransform"] = None
22
+ """The function to apply to the output."""
22
23
  top_k: Optional[int] = None
23
24
  """When specified, limits the output to the top K most probable classes."""
24
25
 
@@ -27,8 +28,10 @@ class AudioClassificationParameters(BaseInferenceType):
27
28
  class AudioClassificationInput(BaseInferenceType):
28
29
  """Inputs for Audio Classification inference"""
29
30
 
30
- inputs: Any
31
- """The input audio data"""
31
+ inputs: str
32
+ """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
33
+ also provide the audio data as a raw bytes payload.
34
+ """
32
35
  parameters: Optional[AudioClassificationParameters] = None
33
36
  """Additional inference parameters"""
34
37
 
@@ -4,12 +4,12 @@
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
6
  from dataclasses import dataclass
7
- from typing import Any, List, Literal, Optional, Union
7
+ from typing import List, Literal, Optional, Union
8
8
 
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
- EarlyStoppingEnum = Literal["never"]
12
+ AutomaticSpeechRecognitionEarlyStoppingEnum = Literal["never"]
13
13
 
14
14
 
15
15
  @dataclass
@@ -20,7 +20,7 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
20
20
 
21
21
  do_sample: Optional[bool] = None
22
22
  """Whether to use sampling instead of greedy decoding when generating new tokens."""
23
- early_stopping: Optional[Union[bool, "EarlyStoppingEnum"]] = None
23
+ early_stopping: Optional[Union[bool, "AutomaticSpeechRecognitionEarlyStoppingEnum"]] = None
24
24
  """Controls the stopping condition for beam-based methods."""
25
25
  epsilon_cutoff: Optional[float] = None
26
26
  """If set to float strictly between 0 and 1, only tokens with a conditional probability
@@ -40,11 +40,11 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
40
40
  max_length: Optional[int] = None
41
41
  """The maximum length (in tokens) of the generated text, including the input."""
42
42
  max_new_tokens: Optional[int] = None
43
- """The maximum number of tokens to generate. Takes precedence over maxLength."""
43
+ """The maximum number of tokens to generate. Takes precedence over max_length."""
44
44
  min_length: Optional[int] = None
45
45
  """The minimum length (in tokens) of the generated text, including the input."""
46
46
  min_new_tokens: Optional[int] = None
47
- """The minimum number of tokens to generate. Takes precedence over maxLength."""
47
+ """The minimum number of tokens to generate. Takes precedence over min_length."""
48
48
  num_beam_groups: Optional[int] = None
49
49
  """Number of groups to divide num_beams into in order to ensure diversity among different
50
50
  groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -90,8 +90,10 @@ class AutomaticSpeechRecognitionParameters(BaseInferenceType):
90
90
  class AutomaticSpeechRecognitionInput(BaseInferenceType):
91
91
  """Inputs for Automatic Speech Recognition inference"""
92
92
 
93
- inputs: Any
94
- """The input audio data"""
93
+ inputs: str
94
+ """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
95
+ also provide the audio data as a raw bytes payload.
96
+ """
95
97
  parameters: Optional[AutomaticSpeechRecognitionParameters] = None
96
98
  """Additional inference parameters"""
97
99
 
@@ -44,13 +44,23 @@ class ChatCompletionInputGrammarType(BaseInferenceType):
44
44
  """
45
45
 
46
46
 
47
+ @dataclass
48
+ class ChatCompletionInputStreamOptions(BaseInferenceType):
49
+ include_usage: bool
50
+ """If set, an additional chunk will be streamed before the data: [DONE] message. The usage
51
+ field on this chunk shows the token usage statistics for the entire request, and the
52
+ choices field will always be an empty array. All other chunks will also include a usage
53
+ field, but with a null value.
54
+ """
55
+
56
+
47
57
  @dataclass
48
58
  class ChatCompletionInputFunctionName(BaseInferenceType):
49
59
  name: str
50
60
 
51
61
 
52
62
  @dataclass
53
- class ChatCompletionInputToolTypeClass(BaseInferenceType):
63
+ class ChatCompletionInputToolType(BaseInferenceType):
54
64
  function: Optional[ChatCompletionInputFunctionName] = None
55
65
 
56
66
 
@@ -62,7 +72,7 @@ class ChatCompletionInputFunctionDefinition(BaseInferenceType):
62
72
 
63
73
 
64
74
  @dataclass
65
- class ChatCompletionInputTool(BaseInferenceType):
75
+ class ToolElement(BaseInferenceType):
66
76
  function: ChatCompletionInputFunctionDefinition
67
77
  type: str
68
78
 
@@ -121,16 +131,17 @@ class ChatCompletionInput(BaseInferenceType):
121
131
  stop: Optional[List[str]] = None
122
132
  """Up to 4 sequences where the API will stop generating further tokens."""
123
133
  stream: Optional[bool] = None
134
+ stream_options: Optional[ChatCompletionInputStreamOptions] = None
124
135
  temperature: Optional[float] = None
125
136
  """What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
126
137
  output more random, while
127
138
  lower values like 0.2 will make it more focused and deterministic.
128
139
  We generally recommend altering this or `top_p` but not both.
129
140
  """
130
- tool_choice: Optional[Union[ChatCompletionInputToolTypeClass, str]] = None
141
+ tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None
131
142
  tool_prompt: Optional[str] = None
132
143
  """A prompt to be appended before the tools"""
133
- tools: Optional[List[ChatCompletionInputTool]] = None
144
+ tools: Optional[List[ToolElement]] = None
134
145
  """A list of tools the model may call. Currently, only functions are supported as a tool.
135
146
  Use this to provide a list of
136
147
  functions the model may generate JSON inputs for.
@@ -265,6 +276,13 @@ class ChatCompletionStreamOutputChoice(BaseInferenceType):
265
276
  logprobs: Optional[ChatCompletionStreamOutputLogprobs] = None
266
277
 
267
278
 
279
+ @dataclass
280
+ class ChatCompletionStreamOutputUsage(BaseInferenceType):
281
+ completion_tokens: int
282
+ prompt_tokens: int
283
+ total_tokens: int
284
+
285
+
268
286
  @dataclass
269
287
  class ChatCompletionStreamOutput(BaseInferenceType):
270
288
  """Chat Completion Stream Output.
@@ -278,3 +296,4 @@ class ChatCompletionStreamOutput(BaseInferenceType):
278
296
  id: str
279
297
  model: str
280
298
  system_fingerprint: str
299
+ usage: Optional[ChatCompletionStreamOutputUsage] = None
@@ -4,12 +4,12 @@
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
6
  from dataclasses import dataclass
7
- from typing import Any, Literal, Optional
7
+ from typing import Literal, Optional
8
8
 
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
- ClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
12
+ ImageClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
13
13
 
14
14
 
15
15
  @dataclass
@@ -18,7 +18,8 @@ class ImageClassificationParameters(BaseInferenceType):
18
18
  Additional inference parameters for Image Classification
19
19
  """
20
20
 
21
- function_to_apply: Optional["ClassificationOutputTransform"] = None
21
+ function_to_apply: Optional["ImageClassificationOutputTransform"] = None
22
+ """The function to apply to the output."""
22
23
  top_k: Optional[int] = None
23
24
  """When specified, limits the output to the top K most probable classes."""
24
25
 
@@ -27,8 +28,10 @@ class ImageClassificationParameters(BaseInferenceType):
27
28
  class ImageClassificationInput(BaseInferenceType):
28
29
  """Inputs for Image Classification inference"""
29
30
 
30
- inputs: Any
31
- """The input image data"""
31
+ inputs: str
32
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
33
+ also provide the image data as a raw bytes payload.
34
+ """
32
35
  parameters: Optional[ImageClassificationParameters] = None
33
36
  """Additional inference parameters"""
34
37
 
@@ -4,7 +4,7 @@
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
6
  from dataclasses import dataclass
7
- from typing import Any, Literal, Optional
7
+ from typing import Literal, Optional
8
8
 
9
9
  from .base import BaseInferenceType
10
10
 
@@ -32,8 +32,10 @@ class ImageSegmentationParameters(BaseInferenceType):
32
32
  class ImageSegmentationInput(BaseInferenceType):
33
33
  """Inputs for Image Segmentation inference"""
34
34
 
35
- inputs: Any
36
- """The input image data"""
35
+ inputs: str
36
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
37
+ also provide the image data as a raw bytes payload.
38
+ """
37
39
  parameters: Optional[ImageSegmentationParameters] = None
38
40
  """Additional inference parameters"""
39
41
 
@@ -45,8 +47,8 @@ class ImageSegmentationOutputElement(BaseInferenceType):
45
47
  """
46
48
 
47
49
  label: str
48
- """The label of the predicted segment"""
49
- mask: Any
50
- """The corresponding mask as a black-and-white image"""
50
+ """The label of the predicted segment."""
51
+ mask: str
52
+ """The corresponding mask as a black-and-white image (base64-encoded)."""
51
53
  score: Optional[float] = None
52
- """The score or confidence degreee the model has"""
54
+ """The score or confidence degree the model has."""
@@ -11,7 +11,7 @@ from .base import BaseInferenceType
11
11
 
12
12
  @dataclass
13
13
  class ImageToImageTargetSize(BaseInferenceType):
14
- """The size in pixel of the output image"""
14
+ """The size in pixel of the output image."""
15
15
 
16
16
  height: int
17
17
  width: int
@@ -34,15 +34,17 @@ class ImageToImageParameters(BaseInferenceType):
34
34
  a higher quality image at the expense of slower inference.
35
35
  """
36
36
  target_size: Optional[ImageToImageTargetSize] = None
37
- """The size in pixel of the output image"""
37
+ """The size in pixel of the output image."""
38
38
 
39
39
 
40
40
  @dataclass
41
41
  class ImageToImageInput(BaseInferenceType):
42
42
  """Inputs for Image To Image inference"""
43
43
 
44
- inputs: Any
45
- """The input image data"""
44
+ inputs: str
45
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
46
+ also provide the image data as a raw bytes payload.
47
+ """
46
48
  parameters: Optional[ImageToImageParameters] = None
47
49
  """Additional inference parameters"""
48
50
 
@@ -52,4 +54,4 @@ class ImageToImageOutput(BaseInferenceType):
52
54
  """Outputs of inference for the Image To Image task"""
53
55
 
54
56
  image: Any
55
- """The output image"""
57
+ """The output image returned as raw bytes in the payload."""
@@ -9,7 +9,7 @@ from typing import Any, Literal, Optional, Union
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
- EarlyStoppingEnum = Literal["never"]
12
+ ImageToTextEarlyStoppingEnum = Literal["never"]
13
13
 
14
14
 
15
15
  @dataclass
@@ -20,7 +20,7 @@ class ImageToTextGenerationParameters(BaseInferenceType):
20
20
 
21
21
  do_sample: Optional[bool] = None
22
22
  """Whether to use sampling instead of greedy decoding when generating new tokens."""
23
- early_stopping: Optional[Union[bool, "EarlyStoppingEnum"]] = None
23
+ early_stopping: Optional[Union[bool, "ImageToTextEarlyStoppingEnum"]] = None
24
24
  """Controls the stopping condition for beam-based methods."""
25
25
  epsilon_cutoff: Optional[float] = None
26
26
  """If set to float strictly between 0 and 1, only tokens with a conditional probability
@@ -40,11 +40,11 @@ class ImageToTextGenerationParameters(BaseInferenceType):
40
40
  max_length: Optional[int] = None
41
41
  """The maximum length (in tokens) of the generated text, including the input."""
42
42
  max_new_tokens: Optional[int] = None
43
- """The maximum number of tokens to generate. Takes precedence over maxLength."""
43
+ """The maximum number of tokens to generate. Takes precedence over max_length."""
44
44
  min_length: Optional[int] = None
45
45
  """The minimum length (in tokens) of the generated text, including the input."""
46
46
  min_new_tokens: Optional[int] = None
47
- """The minimum number of tokens to generate. Takes precedence over maxLength."""
47
+ """The minimum number of tokens to generate. Takes precedence over min_length."""
48
48
  num_beam_groups: Optional[int] = None
49
49
  """Number of groups to divide num_beams into in order to ensure diversity among different
50
50
  groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
@@ -4,7 +4,7 @@
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
6
  from dataclasses import dataclass
7
- from typing import Any, Optional
7
+ from typing import Optional
8
8
 
9
9
  from .base import BaseInferenceType
10
10
 
@@ -23,8 +23,10 @@ class ObjectDetectionParameters(BaseInferenceType):
23
23
  class ObjectDetectionInput(BaseInferenceType):
24
24
  """Inputs for Object Detection inference"""
25
25
 
26
- inputs: Any
27
- """The input image data"""
26
+ inputs: str
27
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
28
+ also provide the image data as a raw bytes payload.
29
+ """
28
30
  parameters: Optional[ObjectDetectionParameters] = None
29
31
  """Additional inference parameters"""
30
32
 
@@ -36,9 +38,13 @@ class ObjectDetectionBoundingBox(BaseInferenceType):
36
38
  """
37
39
 
38
40
  xmax: int
41
+ """The x-coordinate of the bottom-right corner of the bounding box."""
39
42
  xmin: int
43
+ """The x-coordinate of the top-left corner of the bounding box."""
40
44
  ymax: int
45
+ """The y-coordinate of the bottom-right corner of the bounding box."""
41
46
  ymin: int
47
+ """The y-coordinate of the top-left corner of the bounding box."""
42
48
 
43
49
 
44
50
  @dataclass
@@ -50,6 +56,6 @@ class ObjectDetectionOutputElement(BaseInferenceType):
50
56
  image.
51
57
  """
52
58
  label: str
53
- """The predicted label for the bounding box"""
59
+ """The predicted label for the bounding box."""
54
60
  score: float
55
- """The associated score / probability"""
61
+ """The associated score / probability."""
@@ -9,33 +9,31 @@ from typing import Any, Dict, Literal, Optional
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
- SummarizationGenerationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
12
+ SummarizationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
13
13
 
14
14
 
15
15
  @dataclass
16
- class SummarizationGenerationParameters(BaseInferenceType):
17
- """Additional inference parameters
18
- Additional inference parameters for Text2text Generation
16
+ class SummarizationParameters(BaseInferenceType):
17
+ """Additional inference parameters.
18
+ Additional inference parameters for summarization.
19
19
  """
20
20
 
21
21
  clean_up_tokenization_spaces: Optional[bool] = None
22
22
  """Whether to clean up the potential extra spaces in the text output."""
23
23
  generate_parameters: Optional[Dict[str, Any]] = None
24
- """Additional parametrization of the text generation algorithm"""
25
- truncation: Optional["SummarizationGenerationTruncationStrategy"] = None
26
- """The truncation strategy to use"""
24
+ """Additional parametrization of the text generation algorithm."""
25
+ truncation: Optional["SummarizationTruncationStrategy"] = None
26
+ """The truncation strategy to use."""
27
27
 
28
28
 
29
29
  @dataclass
30
30
  class SummarizationInput(BaseInferenceType):
31
- """Inputs for Summarization inference
32
- Inputs for Text2text Generation inference
33
- """
31
+ """Inputs for Summarization inference"""
34
32
 
35
33
  inputs: str
36
- """The input text data"""
37
- parameters: Optional[SummarizationGenerationParameters] = None
38
- """Additional inference parameters"""
34
+ """The input text to summarize."""
35
+ parameters: Optional[SummarizationParameters] = None
36
+ """Additional inference parameters."""
39
37
 
40
38
 
41
39
  @dataclass
@@ -9,18 +9,23 @@ from typing import Literal, Optional
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
- ClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
12
+ TextClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
13
13
 
14
14
 
15
15
  @dataclass
16
16
  class TextClassificationParameters(BaseInferenceType):
17
- """Additional inference parameters
18
- Additional inference parameters for Text Classification
17
+ """
18
+ Additional inference parameters for Text Classification.
19
19
  """
20
20
 
21
- function_to_apply: Optional["ClassificationOutputTransform"] = None
21
+ function_to_apply: Optional["TextClassificationOutputTransform"] = None
22
+ """
23
+ The function to apply to the output.
24
+ """
22
25
  top_k: Optional[int] = None
23
- """When specified, limits the output to the top K most probable classes."""
26
+ """
27
+ When specified, limits the output to the top K most probable classes.
28
+ """
24
29
 
25
30
 
26
31
  @dataclass
@@ -142,6 +142,7 @@ class TextGenerationOutput(BaseInferenceType):
142
142
  class TextGenerationStreamOutputStreamDetails(BaseInferenceType):
143
143
  finish_reason: "TextGenerationOutputFinishReason"
144
144
  generated_tokens: int
145
+ input_length: int
145
146
  seed: Optional[int] = None
146
147
 
147
148
 
@@ -9,7 +9,7 @@ from typing import Any, Literal, Optional, Union
9
9
  from .base import BaseInferenceType
10
10
 
11
11
 
12
- EarlyStoppingEnum = Literal["never"]
12
+ TextToAudioEarlyStoppingEnum = Literal["never"]
13
13
 
14
14
 
15
15
  @dataclass
@@ -20,7 +20,7 @@ class TextToAudioGenerationParameters(BaseInferenceType):
20
20
 
21
21
  do_sample: Optional[bool] = None
22
22
  """Whether to use sampling instead of greedy decoding when generating new tokens."""
23
- early_stopping: Optional[Union[bool, "EarlyStoppingEnum"]] = None
23
+ early_stopping: Optional[Union[bool, "TextToAudioEarlyStoppingEnum"]] = None
24
24
  """Controls the stopping condition for beam-based methods."""
25
25
  epsilon_cutoff: Optional[float] = None
26
26
  """If set to float strictly between 0 and 1, only tokens with a conditional probability
@@ -24,17 +24,19 @@ class TextToImageParameters(BaseInferenceType):
24
24
  """
25
25
 
26
26
  guidance_scale: Optional[float] = None
27
- """For diffusion models. A higher guidance scale value encourages the model to generate
28
- images closely linked to the text prompt at the expense of lower image quality.
27
+ """A higher guidance scale value encourages the model to generate images closely linked to
28
+ the text prompt, but values too high may cause saturation and other artifacts.
29
29
  """
30
30
  negative_prompt: Optional[List[str]] = None
31
31
  """One or several prompt to guide what NOT to include in image generation."""
32
32
  num_inference_steps: Optional[int] = None
33
- """For diffusion models. The number of denoising steps. More denoising steps usually lead to
34
- a higher quality image at the expense of slower inference.
33
+ """The number of denoising steps. More denoising steps usually lead to a higher quality
34
+ image at the expense of slower inference.
35
35
  """
36
36
  scheduler: Optional[str] = None
37
- """For diffusion models. Override the scheduler with a compatible one"""
37
+ """Override the scheduler with a compatible one."""
38
+ seed: Optional[int] = None
39
+ """Seed for the random number generator."""
38
40
  target_size: Optional[TextToImageTargetSize] = None
39
41
  """The size in pixel of the output image"""
40
42
 
@@ -44,7 +46,7 @@ class TextToImageInput(BaseInferenceType):
44
46
  """Inputs for Text To Image inference"""
45
47
 
46
48
  inputs: str
47
- """The input text data (sometimes called "prompt\""""
49
+ """The input text data (sometimes called "prompt")"""
48
50
  parameters: Optional[TextToImageParameters] = None
49
51
  """Additional inference parameters"""
50
52
 
@@ -54,4 +56,4 @@ class TextToImageOutput(BaseInferenceType):
54
56
  """Outputs of inference for the Text To Image task"""
55
57
 
56
58
  image: Any
57
- """The generated image"""
59
+ """The generated image returned as raw bytes in the payload."""