huggingface-hub 0.25.2__py3-none-any.whl → 0.26.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +45 -11
- huggingface_hub/_login.py +172 -33
- huggingface_hub/commands/user.py +125 -9
- huggingface_hub/constants.py +1 -1
- huggingface_hub/errors.py +6 -9
- huggingface_hub/file_download.py +2 -372
- huggingface_hub/hf_api.py +170 -13
- huggingface_hub/hf_file_system.py +3 -3
- huggingface_hub/hub_mixin.py +2 -1
- huggingface_hub/inference/_client.py +500 -145
- huggingface_hub/inference/_common.py +42 -4
- huggingface_hub/inference/_generated/_async_client.py +499 -144
- huggingface_hub/inference/_generated/types/__init__.py +37 -7
- huggingface_hub/inference/_generated/types/audio_classification.py +8 -5
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +9 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +23 -4
- huggingface_hub/inference/_generated/types/image_classification.py +8 -5
- huggingface_hub/inference/_generated/types/image_segmentation.py +9 -7
- huggingface_hub/inference/_generated/types/image_to_image.py +7 -5
- huggingface_hub/inference/_generated/types/image_to_text.py +4 -4
- huggingface_hub/inference/_generated/types/object_detection.py +11 -5
- huggingface_hub/inference/_generated/types/summarization.py +11 -13
- huggingface_hub/inference/_generated/types/text_classification.py +10 -5
- huggingface_hub/inference/_generated/types/text_generation.py +1 -0
- huggingface_hub/inference/_generated/types/text_to_audio.py +2 -2
- huggingface_hub/inference/_generated/types/text_to_image.py +9 -7
- huggingface_hub/inference/_generated/types/text_to_speech.py +107 -0
- huggingface_hub/inference/_generated/types/translation.py +17 -11
- huggingface_hub/inference/_generated/types/video_classification.py +2 -2
- huggingface_hub/repocard.py +2 -1
- huggingface_hub/repocard_data.py +10 -2
- huggingface_hub/serialization/_torch.py +7 -4
- huggingface_hub/utils/__init__.py +4 -20
- huggingface_hub/utils/{_token.py → _auth.py} +86 -3
- huggingface_hub/utils/_headers.py +1 -1
- huggingface_hub/utils/_hf_folder.py +1 -1
- huggingface_hub/utils/_http.py +10 -4
- huggingface_hub/utils/_runtime.py +1 -10
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/METADATA +12 -12
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/RECORD +44 -44
- huggingface_hub/inference/_templating.py +0 -102
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -6,10 +6,12 @@
|
|
|
6
6
|
from .audio_classification import (
|
|
7
7
|
AudioClassificationInput,
|
|
8
8
|
AudioClassificationOutputElement,
|
|
9
|
+
AudioClassificationOutputTransform,
|
|
9
10
|
AudioClassificationParameters,
|
|
10
11
|
)
|
|
11
12
|
from .audio_to_audio import AudioToAudioInput, AudioToAudioOutputElement
|
|
12
13
|
from .automatic_speech_recognition import (
|
|
14
|
+
AutomaticSpeechRecognitionEarlyStoppingEnum,
|
|
13
15
|
AutomaticSpeechRecognitionGenerationParameters,
|
|
14
16
|
AutomaticSpeechRecognitionInput,
|
|
15
17
|
AutomaticSpeechRecognitionOutput,
|
|
@@ -24,8 +26,8 @@ from .chat_completion import (
|
|
|
24
26
|
ChatCompletionInputGrammarType,
|
|
25
27
|
ChatCompletionInputMessage,
|
|
26
28
|
ChatCompletionInputMessageChunk,
|
|
27
|
-
|
|
28
|
-
|
|
29
|
+
ChatCompletionInputStreamOptions,
|
|
30
|
+
ChatCompletionInputToolType,
|
|
29
31
|
ChatCompletionInputURL,
|
|
30
32
|
ChatCompletionOutput,
|
|
31
33
|
ChatCompletionOutputComplete,
|
|
@@ -44,6 +46,8 @@ from .chat_completion import (
|
|
|
44
46
|
ChatCompletionStreamOutputLogprob,
|
|
45
47
|
ChatCompletionStreamOutputLogprobs,
|
|
46
48
|
ChatCompletionStreamOutputTopLogprob,
|
|
49
|
+
ChatCompletionStreamOutputUsage,
|
|
50
|
+
ToolElement,
|
|
47
51
|
)
|
|
48
52
|
from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
|
|
49
53
|
from .document_question_answering import (
|
|
@@ -57,11 +61,18 @@ from .fill_mask import FillMaskInput, FillMaskOutputElement, FillMaskParameters
|
|
|
57
61
|
from .image_classification import (
|
|
58
62
|
ImageClassificationInput,
|
|
59
63
|
ImageClassificationOutputElement,
|
|
64
|
+
ImageClassificationOutputTransform,
|
|
60
65
|
ImageClassificationParameters,
|
|
61
66
|
)
|
|
62
67
|
from .image_segmentation import ImageSegmentationInput, ImageSegmentationOutputElement, ImageSegmentationParameters
|
|
63
68
|
from .image_to_image import ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToImageTargetSize
|
|
64
|
-
from .image_to_text import
|
|
69
|
+
from .image_to_text import (
|
|
70
|
+
ImageToTextEarlyStoppingEnum,
|
|
71
|
+
ImageToTextGenerationParameters,
|
|
72
|
+
ImageToTextInput,
|
|
73
|
+
ImageToTextOutput,
|
|
74
|
+
ImageToTextParameters,
|
|
75
|
+
)
|
|
65
76
|
from .object_detection import (
|
|
66
77
|
ObjectDetectionBoundingBox,
|
|
67
78
|
ObjectDetectionInput,
|
|
@@ -75,14 +86,19 @@ from .question_answering import (
|
|
|
75
86
|
QuestionAnsweringParameters,
|
|
76
87
|
)
|
|
77
88
|
from .sentence_similarity import SentenceSimilarityInput, SentenceSimilarityInputData
|
|
78
|
-
from .summarization import
|
|
89
|
+
from .summarization import SummarizationInput, SummarizationOutput, SummarizationParameters
|
|
79
90
|
from .table_question_answering import (
|
|
80
91
|
TableQuestionAnsweringInput,
|
|
81
92
|
TableQuestionAnsweringInputData,
|
|
82
93
|
TableQuestionAnsweringOutputElement,
|
|
83
94
|
)
|
|
84
95
|
from .text2text_generation import Text2TextGenerationInput, Text2TextGenerationOutput, Text2TextGenerationParameters
|
|
85
|
-
from .text_classification import
|
|
96
|
+
from .text_classification import (
|
|
97
|
+
TextClassificationInput,
|
|
98
|
+
TextClassificationOutputElement,
|
|
99
|
+
TextClassificationOutputTransform,
|
|
100
|
+
TextClassificationParameters,
|
|
101
|
+
)
|
|
86
102
|
from .text_generation import (
|
|
87
103
|
TextGenerationInput,
|
|
88
104
|
TextGenerationInputGenerateParameters,
|
|
@@ -96,17 +112,31 @@ from .text_generation import (
|
|
|
96
112
|
TextGenerationStreamOutputStreamDetails,
|
|
97
113
|
TextGenerationStreamOutputToken,
|
|
98
114
|
)
|
|
99
|
-
from .text_to_audio import
|
|
115
|
+
from .text_to_audio import (
|
|
116
|
+
TextToAudioEarlyStoppingEnum,
|
|
117
|
+
TextToAudioGenerationParameters,
|
|
118
|
+
TextToAudioInput,
|
|
119
|
+
TextToAudioOutput,
|
|
120
|
+
TextToAudioParameters,
|
|
121
|
+
)
|
|
100
122
|
from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters, TextToImageTargetSize
|
|
123
|
+
from .text_to_speech import (
|
|
124
|
+
TextToSpeechEarlyStoppingEnum,
|
|
125
|
+
TextToSpeechGenerationParameters,
|
|
126
|
+
TextToSpeechInput,
|
|
127
|
+
TextToSpeechOutput,
|
|
128
|
+
TextToSpeechParameters,
|
|
129
|
+
)
|
|
101
130
|
from .token_classification import (
|
|
102
131
|
TokenClassificationInput,
|
|
103
132
|
TokenClassificationOutputElement,
|
|
104
133
|
TokenClassificationParameters,
|
|
105
134
|
)
|
|
106
|
-
from .translation import
|
|
135
|
+
from .translation import TranslationInput, TranslationOutput, TranslationParameters
|
|
107
136
|
from .video_classification import (
|
|
108
137
|
VideoClassificationInput,
|
|
109
138
|
VideoClassificationOutputElement,
|
|
139
|
+
VideoClassificationOutputTransform,
|
|
110
140
|
VideoClassificationParameters,
|
|
111
141
|
)
|
|
112
142
|
from .visual_question_answering import (
|
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Literal, Optional
|
|
8
8
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
AudioClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
@@ -18,7 +18,8 @@ class AudioClassificationParameters(BaseInferenceType):
|
|
|
18
18
|
Additional inference parameters for Audio Classification
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
function_to_apply: Optional["
|
|
21
|
+
function_to_apply: Optional["AudioClassificationOutputTransform"] = None
|
|
22
|
+
"""The function to apply to the output."""
|
|
22
23
|
top_k: Optional[int] = None
|
|
23
24
|
"""When specified, limits the output to the top K most probable classes."""
|
|
24
25
|
|
|
@@ -27,8 +28,10 @@ class AudioClassificationParameters(BaseInferenceType):
|
|
|
27
28
|
class AudioClassificationInput(BaseInferenceType):
|
|
28
29
|
"""Inputs for Audio Classification inference"""
|
|
29
30
|
|
|
30
|
-
inputs:
|
|
31
|
-
"""The input audio data
|
|
31
|
+
inputs: str
|
|
32
|
+
"""The input audio data as a base64-encoded string. If no `parameters` are provided, you can
|
|
33
|
+
also provide the audio data as a raw bytes payload.
|
|
34
|
+
"""
|
|
32
35
|
parameters: Optional[AudioClassificationParameters] = None
|
|
33
36
|
"""Additional inference parameters"""
|
|
34
37
|
|
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import List, Literal, Optional, Union
|
|
8
8
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
AutomaticSpeechRecognitionEarlyStoppingEnum = Literal["never"]
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
@@ -20,7 +20,7 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
|
|
|
20
20
|
|
|
21
21
|
do_sample: Optional[bool] = None
|
|
22
22
|
"""Whether to use sampling instead of greedy decoding when generating new tokens."""
|
|
23
|
-
early_stopping: Optional[Union[bool, "
|
|
23
|
+
early_stopping: Optional[Union[bool, "AutomaticSpeechRecognitionEarlyStoppingEnum"]] = None
|
|
24
24
|
"""Controls the stopping condition for beam-based methods."""
|
|
25
25
|
epsilon_cutoff: Optional[float] = None
|
|
26
26
|
"""If set to float strictly between 0 and 1, only tokens with a conditional probability
|
|
@@ -40,11 +40,11 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
|
|
|
40
40
|
max_length: Optional[int] = None
|
|
41
41
|
"""The maximum length (in tokens) of the generated text, including the input."""
|
|
42
42
|
max_new_tokens: Optional[int] = None
|
|
43
|
-
"""The maximum number of tokens to generate. Takes precedence over
|
|
43
|
+
"""The maximum number of tokens to generate. Takes precedence over max_length."""
|
|
44
44
|
min_length: Optional[int] = None
|
|
45
45
|
"""The minimum length (in tokens) of the generated text, including the input."""
|
|
46
46
|
min_new_tokens: Optional[int] = None
|
|
47
|
-
"""The minimum number of tokens to generate. Takes precedence over
|
|
47
|
+
"""The minimum number of tokens to generate. Takes precedence over min_length."""
|
|
48
48
|
num_beam_groups: Optional[int] = None
|
|
49
49
|
"""Number of groups to divide num_beams into in order to ensure diversity among different
|
|
50
50
|
groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
|
@@ -90,8 +90,10 @@ class AutomaticSpeechRecognitionParameters(BaseInferenceType):
|
|
|
90
90
|
class AutomaticSpeechRecognitionInput(BaseInferenceType):
|
|
91
91
|
"""Inputs for Automatic Speech Recognition inference"""
|
|
92
92
|
|
|
93
|
-
inputs:
|
|
94
|
-
"""The input audio data
|
|
93
|
+
inputs: str
|
|
94
|
+
"""The input audio data as a base64-encoded string. If no `parameters` are provided, you can
|
|
95
|
+
also provide the audio data as a raw bytes payload.
|
|
96
|
+
"""
|
|
95
97
|
parameters: Optional[AutomaticSpeechRecognitionParameters] = None
|
|
96
98
|
"""Additional inference parameters"""
|
|
97
99
|
|
|
@@ -44,13 +44,23 @@ class ChatCompletionInputGrammarType(BaseInferenceType):
|
|
|
44
44
|
"""
|
|
45
45
|
|
|
46
46
|
|
|
47
|
+
@dataclass
|
|
48
|
+
class ChatCompletionInputStreamOptions(BaseInferenceType):
|
|
49
|
+
include_usage: bool
|
|
50
|
+
"""If set, an additional chunk will be streamed before the data: [DONE] message. The usage
|
|
51
|
+
field on this chunk shows the token usage statistics for the entire request, and the
|
|
52
|
+
choices field will always be an empty array. All other chunks will also include a usage
|
|
53
|
+
field, but with a null value.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
|
|
47
57
|
@dataclass
|
|
48
58
|
class ChatCompletionInputFunctionName(BaseInferenceType):
|
|
49
59
|
name: str
|
|
50
60
|
|
|
51
61
|
|
|
52
62
|
@dataclass
|
|
53
|
-
class
|
|
63
|
+
class ChatCompletionInputToolType(BaseInferenceType):
|
|
54
64
|
function: Optional[ChatCompletionInputFunctionName] = None
|
|
55
65
|
|
|
56
66
|
|
|
@@ -62,7 +72,7 @@ class ChatCompletionInputFunctionDefinition(BaseInferenceType):
|
|
|
62
72
|
|
|
63
73
|
|
|
64
74
|
@dataclass
|
|
65
|
-
class
|
|
75
|
+
class ToolElement(BaseInferenceType):
|
|
66
76
|
function: ChatCompletionInputFunctionDefinition
|
|
67
77
|
type: str
|
|
68
78
|
|
|
@@ -121,16 +131,17 @@ class ChatCompletionInput(BaseInferenceType):
|
|
|
121
131
|
stop: Optional[List[str]] = None
|
|
122
132
|
"""Up to 4 sequences where the API will stop generating further tokens."""
|
|
123
133
|
stream: Optional[bool] = None
|
|
134
|
+
stream_options: Optional[ChatCompletionInputStreamOptions] = None
|
|
124
135
|
temperature: Optional[float] = None
|
|
125
136
|
"""What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
|
|
126
137
|
output more random, while
|
|
127
138
|
lower values like 0.2 will make it more focused and deterministic.
|
|
128
139
|
We generally recommend altering this or `top_p` but not both.
|
|
129
140
|
"""
|
|
130
|
-
tool_choice: Optional[Union[
|
|
141
|
+
tool_choice: Optional[Union[ChatCompletionInputToolType, str]] = None
|
|
131
142
|
tool_prompt: Optional[str] = None
|
|
132
143
|
"""A prompt to be appended before the tools"""
|
|
133
|
-
tools: Optional[List[
|
|
144
|
+
tools: Optional[List[ToolElement]] = None
|
|
134
145
|
"""A list of tools the model may call. Currently, only functions are supported as a tool.
|
|
135
146
|
Use this to provide a list of
|
|
136
147
|
functions the model may generate JSON inputs for.
|
|
@@ -265,6 +276,13 @@ class ChatCompletionStreamOutputChoice(BaseInferenceType):
|
|
|
265
276
|
logprobs: Optional[ChatCompletionStreamOutputLogprobs] = None
|
|
266
277
|
|
|
267
278
|
|
|
279
|
+
@dataclass
|
|
280
|
+
class ChatCompletionStreamOutputUsage(BaseInferenceType):
|
|
281
|
+
completion_tokens: int
|
|
282
|
+
prompt_tokens: int
|
|
283
|
+
total_tokens: int
|
|
284
|
+
|
|
285
|
+
|
|
268
286
|
@dataclass
|
|
269
287
|
class ChatCompletionStreamOutput(BaseInferenceType):
|
|
270
288
|
"""Chat Completion Stream Output.
|
|
@@ -278,3 +296,4 @@ class ChatCompletionStreamOutput(BaseInferenceType):
|
|
|
278
296
|
id: str
|
|
279
297
|
model: str
|
|
280
298
|
system_fingerprint: str
|
|
299
|
+
usage: Optional[ChatCompletionStreamOutputUsage] = None
|
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Literal, Optional
|
|
8
8
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
ImageClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
@@ -18,7 +18,8 @@ class ImageClassificationParameters(BaseInferenceType):
|
|
|
18
18
|
Additional inference parameters for Image Classification
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
function_to_apply: Optional["
|
|
21
|
+
function_to_apply: Optional["ImageClassificationOutputTransform"] = None
|
|
22
|
+
"""The function to apply to the output."""
|
|
22
23
|
top_k: Optional[int] = None
|
|
23
24
|
"""When specified, limits the output to the top K most probable classes."""
|
|
24
25
|
|
|
@@ -27,8 +28,10 @@ class ImageClassificationParameters(BaseInferenceType):
|
|
|
27
28
|
class ImageClassificationInput(BaseInferenceType):
|
|
28
29
|
"""Inputs for Image Classification inference"""
|
|
29
30
|
|
|
30
|
-
inputs:
|
|
31
|
-
"""The input image data
|
|
31
|
+
inputs: str
|
|
32
|
+
"""The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
33
|
+
also provide the image data as a raw bytes payload.
|
|
34
|
+
"""
|
|
32
35
|
parameters: Optional[ImageClassificationParameters] = None
|
|
33
36
|
"""Additional inference parameters"""
|
|
34
37
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Literal, Optional
|
|
8
8
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
@@ -32,8 +32,10 @@ class ImageSegmentationParameters(BaseInferenceType):
|
|
|
32
32
|
class ImageSegmentationInput(BaseInferenceType):
|
|
33
33
|
"""Inputs for Image Segmentation inference"""
|
|
34
34
|
|
|
35
|
-
inputs:
|
|
36
|
-
"""The input image data
|
|
35
|
+
inputs: str
|
|
36
|
+
"""The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
37
|
+
also provide the image data as a raw bytes payload.
|
|
38
|
+
"""
|
|
37
39
|
parameters: Optional[ImageSegmentationParameters] = None
|
|
38
40
|
"""Additional inference parameters"""
|
|
39
41
|
|
|
@@ -45,8 +47,8 @@ class ImageSegmentationOutputElement(BaseInferenceType):
|
|
|
45
47
|
"""
|
|
46
48
|
|
|
47
49
|
label: str
|
|
48
|
-
"""The label of the predicted segment"""
|
|
49
|
-
mask:
|
|
50
|
-
"""The corresponding mask as a black-and-white image"""
|
|
50
|
+
"""The label of the predicted segment."""
|
|
51
|
+
mask: str
|
|
52
|
+
"""The corresponding mask as a black-and-white image (base64-encoded)."""
|
|
51
53
|
score: Optional[float] = None
|
|
52
|
-
"""The score or confidence
|
|
54
|
+
"""The score or confidence degree the model has."""
|
|
@@ -11,7 +11,7 @@ from .base import BaseInferenceType
|
|
|
11
11
|
|
|
12
12
|
@dataclass
|
|
13
13
|
class ImageToImageTargetSize(BaseInferenceType):
|
|
14
|
-
"""The size in pixel of the output image"""
|
|
14
|
+
"""The size in pixel of the output image."""
|
|
15
15
|
|
|
16
16
|
height: int
|
|
17
17
|
width: int
|
|
@@ -34,15 +34,17 @@ class ImageToImageParameters(BaseInferenceType):
|
|
|
34
34
|
a higher quality image at the expense of slower inference.
|
|
35
35
|
"""
|
|
36
36
|
target_size: Optional[ImageToImageTargetSize] = None
|
|
37
|
-
"""The size in pixel of the output image"""
|
|
37
|
+
"""The size in pixel of the output image."""
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
@dataclass
|
|
41
41
|
class ImageToImageInput(BaseInferenceType):
|
|
42
42
|
"""Inputs for Image To Image inference"""
|
|
43
43
|
|
|
44
|
-
inputs:
|
|
45
|
-
"""The input image data
|
|
44
|
+
inputs: str
|
|
45
|
+
"""The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
46
|
+
also provide the image data as a raw bytes payload.
|
|
47
|
+
"""
|
|
46
48
|
parameters: Optional[ImageToImageParameters] = None
|
|
47
49
|
"""Additional inference parameters"""
|
|
48
50
|
|
|
@@ -52,4 +54,4 @@ class ImageToImageOutput(BaseInferenceType):
|
|
|
52
54
|
"""Outputs of inference for the Image To Image task"""
|
|
53
55
|
|
|
54
56
|
image: Any
|
|
55
|
-
"""The output image"""
|
|
57
|
+
"""The output image returned as raw bytes in the payload."""
|
|
@@ -9,7 +9,7 @@ from typing import Any, Literal, Optional, Union
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
ImageToTextEarlyStoppingEnum = Literal["never"]
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
@@ -20,7 +20,7 @@ class ImageToTextGenerationParameters(BaseInferenceType):
|
|
|
20
20
|
|
|
21
21
|
do_sample: Optional[bool] = None
|
|
22
22
|
"""Whether to use sampling instead of greedy decoding when generating new tokens."""
|
|
23
|
-
early_stopping: Optional[Union[bool, "
|
|
23
|
+
early_stopping: Optional[Union[bool, "ImageToTextEarlyStoppingEnum"]] = None
|
|
24
24
|
"""Controls the stopping condition for beam-based methods."""
|
|
25
25
|
epsilon_cutoff: Optional[float] = None
|
|
26
26
|
"""If set to float strictly between 0 and 1, only tokens with a conditional probability
|
|
@@ -40,11 +40,11 @@ class ImageToTextGenerationParameters(BaseInferenceType):
|
|
|
40
40
|
max_length: Optional[int] = None
|
|
41
41
|
"""The maximum length (in tokens) of the generated text, including the input."""
|
|
42
42
|
max_new_tokens: Optional[int] = None
|
|
43
|
-
"""The maximum number of tokens to generate. Takes precedence over
|
|
43
|
+
"""The maximum number of tokens to generate. Takes precedence over max_length."""
|
|
44
44
|
min_length: Optional[int] = None
|
|
45
45
|
"""The minimum length (in tokens) of the generated text, including the input."""
|
|
46
46
|
min_new_tokens: Optional[int] = None
|
|
47
|
-
"""The minimum number of tokens to generate. Takes precedence over
|
|
47
|
+
"""The minimum number of tokens to generate. Takes precedence over min_length."""
|
|
48
48
|
num_beam_groups: Optional[int] = None
|
|
49
49
|
"""Number of groups to divide num_beams into in order to ensure diversity among different
|
|
50
50
|
groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import
|
|
7
|
+
from typing import Optional
|
|
8
8
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
@@ -23,8 +23,10 @@ class ObjectDetectionParameters(BaseInferenceType):
|
|
|
23
23
|
class ObjectDetectionInput(BaseInferenceType):
|
|
24
24
|
"""Inputs for Object Detection inference"""
|
|
25
25
|
|
|
26
|
-
inputs:
|
|
27
|
-
"""The input image data
|
|
26
|
+
inputs: str
|
|
27
|
+
"""The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
28
|
+
also provide the image data as a raw bytes payload.
|
|
29
|
+
"""
|
|
28
30
|
parameters: Optional[ObjectDetectionParameters] = None
|
|
29
31
|
"""Additional inference parameters"""
|
|
30
32
|
|
|
@@ -36,9 +38,13 @@ class ObjectDetectionBoundingBox(BaseInferenceType):
|
|
|
36
38
|
"""
|
|
37
39
|
|
|
38
40
|
xmax: int
|
|
41
|
+
"""The x-coordinate of the bottom-right corner of the bounding box."""
|
|
39
42
|
xmin: int
|
|
43
|
+
"""The x-coordinate of the top-left corner of the bounding box."""
|
|
40
44
|
ymax: int
|
|
45
|
+
"""The y-coordinate of the bottom-right corner of the bounding box."""
|
|
41
46
|
ymin: int
|
|
47
|
+
"""The y-coordinate of the top-left corner of the bounding box."""
|
|
42
48
|
|
|
43
49
|
|
|
44
50
|
@dataclass
|
|
@@ -50,6 +56,6 @@ class ObjectDetectionOutputElement(BaseInferenceType):
|
|
|
50
56
|
image.
|
|
51
57
|
"""
|
|
52
58
|
label: str
|
|
53
|
-
"""The predicted label for the bounding box"""
|
|
59
|
+
"""The predicted label for the bounding box."""
|
|
54
60
|
score: float
|
|
55
|
-
"""The associated score / probability"""
|
|
61
|
+
"""The associated score / probability."""
|
|
@@ -9,33 +9,31 @@ from typing import Any, Dict, Literal, Optional
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
SummarizationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
16
|
-
class
|
|
17
|
-
"""Additional inference parameters
|
|
18
|
-
Additional inference parameters for
|
|
16
|
+
class SummarizationParameters(BaseInferenceType):
|
|
17
|
+
"""Additional inference parameters.
|
|
18
|
+
Additional inference parameters for summarization.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
clean_up_tokenization_spaces: Optional[bool] = None
|
|
22
22
|
"""Whether to clean up the potential extra spaces in the text output."""
|
|
23
23
|
generate_parameters: Optional[Dict[str, Any]] = None
|
|
24
|
-
"""Additional parametrization of the text generation algorithm"""
|
|
25
|
-
truncation: Optional["
|
|
26
|
-
"""The truncation strategy to use"""
|
|
24
|
+
"""Additional parametrization of the text generation algorithm."""
|
|
25
|
+
truncation: Optional["SummarizationTruncationStrategy"] = None
|
|
26
|
+
"""The truncation strategy to use."""
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
@dataclass
|
|
30
30
|
class SummarizationInput(BaseInferenceType):
|
|
31
|
-
"""Inputs for Summarization inference
|
|
32
|
-
Inputs for Text2text Generation inference
|
|
33
|
-
"""
|
|
31
|
+
"""Inputs for Summarization inference"""
|
|
34
32
|
|
|
35
33
|
inputs: str
|
|
36
|
-
"""The input text
|
|
37
|
-
parameters: Optional[
|
|
38
|
-
"""Additional inference parameters"""
|
|
34
|
+
"""The input text to summarize."""
|
|
35
|
+
parameters: Optional[SummarizationParameters] = None
|
|
36
|
+
"""Additional inference parameters."""
|
|
39
37
|
|
|
40
38
|
|
|
41
39
|
@dataclass
|
|
@@ -9,18 +9,23 @@ from typing import Literal, Optional
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
TextClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
16
16
|
class TextClassificationParameters(BaseInferenceType):
|
|
17
|
-
"""
|
|
18
|
-
Additional inference parameters for Text Classification
|
|
17
|
+
"""
|
|
18
|
+
Additional inference parameters for Text Classification.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
function_to_apply: Optional["
|
|
21
|
+
function_to_apply: Optional["TextClassificationOutputTransform"] = None
|
|
22
|
+
"""
|
|
23
|
+
The function to apply to the output.
|
|
24
|
+
"""
|
|
22
25
|
top_k: Optional[int] = None
|
|
23
|
-
"""
|
|
26
|
+
"""
|
|
27
|
+
When specified, limits the output to the top K most probable classes.
|
|
28
|
+
"""
|
|
24
29
|
|
|
25
30
|
|
|
26
31
|
@dataclass
|
|
@@ -142,6 +142,7 @@ class TextGenerationOutput(BaseInferenceType):
|
|
|
142
142
|
class TextGenerationStreamOutputStreamDetails(BaseInferenceType):
|
|
143
143
|
finish_reason: "TextGenerationOutputFinishReason"
|
|
144
144
|
generated_tokens: int
|
|
145
|
+
input_length: int
|
|
145
146
|
seed: Optional[int] = None
|
|
146
147
|
|
|
147
148
|
|
|
@@ -9,7 +9,7 @@ from typing import Any, Literal, Optional, Union
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
TextToAudioEarlyStoppingEnum = Literal["never"]
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dataclass
|
|
@@ -20,7 +20,7 @@ class TextToAudioGenerationParameters(BaseInferenceType):
|
|
|
20
20
|
|
|
21
21
|
do_sample: Optional[bool] = None
|
|
22
22
|
"""Whether to use sampling instead of greedy decoding when generating new tokens."""
|
|
23
|
-
early_stopping: Optional[Union[bool, "
|
|
23
|
+
early_stopping: Optional[Union[bool, "TextToAudioEarlyStoppingEnum"]] = None
|
|
24
24
|
"""Controls the stopping condition for beam-based methods."""
|
|
25
25
|
epsilon_cutoff: Optional[float] = None
|
|
26
26
|
"""If set to float strictly between 0 and 1, only tokens with a conditional probability
|
|
@@ -24,17 +24,19 @@ class TextToImageParameters(BaseInferenceType):
|
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
26
|
guidance_scale: Optional[float] = None
|
|
27
|
-
"""
|
|
28
|
-
|
|
27
|
+
"""A higher guidance scale value encourages the model to generate images closely linked to
|
|
28
|
+
the text prompt, but values too high may cause saturation and other artifacts.
|
|
29
29
|
"""
|
|
30
30
|
negative_prompt: Optional[List[str]] = None
|
|
31
31
|
"""One or several prompt to guide what NOT to include in image generation."""
|
|
32
32
|
num_inference_steps: Optional[int] = None
|
|
33
|
-
"""
|
|
34
|
-
|
|
33
|
+
"""The number of denoising steps. More denoising steps usually lead to a higher quality
|
|
34
|
+
image at the expense of slower inference.
|
|
35
35
|
"""
|
|
36
36
|
scheduler: Optional[str] = None
|
|
37
|
-
"""
|
|
37
|
+
"""Override the scheduler with a compatible one."""
|
|
38
|
+
seed: Optional[int] = None
|
|
39
|
+
"""Seed for the random number generator."""
|
|
38
40
|
target_size: Optional[TextToImageTargetSize] = None
|
|
39
41
|
"""The size in pixel of the output image"""
|
|
40
42
|
|
|
@@ -44,7 +46,7 @@ class TextToImageInput(BaseInferenceType):
|
|
|
44
46
|
"""Inputs for Text To Image inference"""
|
|
45
47
|
|
|
46
48
|
inputs: str
|
|
47
|
-
"""The input text data (sometimes called "prompt
|
|
49
|
+
"""The input text data (sometimes called "prompt")"""
|
|
48
50
|
parameters: Optional[TextToImageParameters] = None
|
|
49
51
|
"""Additional inference parameters"""
|
|
50
52
|
|
|
@@ -54,4 +56,4 @@ class TextToImageOutput(BaseInferenceType):
|
|
|
54
56
|
"""Outputs of inference for the Text To Image task"""
|
|
55
57
|
|
|
56
58
|
image: Any
|
|
57
|
-
"""The generated image"""
|
|
59
|
+
"""The generated image returned as raw bytes in the payload."""
|