huggingface-hub 0.21.2__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +217 -1
- huggingface_hub/_commit_api.py +14 -15
- huggingface_hub/_inference_endpoints.py +12 -11
- huggingface_hub/_login.py +1 -0
- huggingface_hub/_multi_commits.py +1 -0
- huggingface_hub/_snapshot_download.py +9 -1
- huggingface_hub/_tensorboard_logger.py +1 -0
- huggingface_hub/_webhooks_payload.py +1 -0
- huggingface_hub/_webhooks_server.py +1 -0
- huggingface_hub/commands/_cli_utils.py +1 -0
- huggingface_hub/commands/delete_cache.py +1 -0
- huggingface_hub/commands/download.py +1 -0
- huggingface_hub/commands/env.py +1 -0
- huggingface_hub/commands/scan_cache.py +1 -0
- huggingface_hub/commands/upload.py +1 -0
- huggingface_hub/community.py +1 -0
- huggingface_hub/constants.py +3 -1
- huggingface_hub/errors.py +38 -0
- huggingface_hub/file_download.py +102 -95
- huggingface_hub/hf_api.py +47 -35
- huggingface_hub/hf_file_system.py +77 -3
- huggingface_hub/hub_mixin.py +230 -61
- huggingface_hub/inference/_client.py +554 -239
- huggingface_hub/inference/_common.py +195 -41
- huggingface_hub/inference/_generated/_async_client.py +558 -239
- huggingface_hub/inference/_generated/types/__init__.py +115 -0
- huggingface_hub/inference/_generated/types/audio_classification.py +43 -0
- huggingface_hub/inference/_generated/types/audio_to_audio.py +31 -0
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +116 -0
- huggingface_hub/inference/_generated/types/base.py +149 -0
- huggingface_hub/inference/_generated/types/chat_completion.py +106 -0
- huggingface_hub/inference/_generated/types/depth_estimation.py +29 -0
- huggingface_hub/inference/_generated/types/document_question_answering.py +85 -0
- huggingface_hub/inference/_generated/types/feature_extraction.py +19 -0
- huggingface_hub/inference/_generated/types/fill_mask.py +50 -0
- huggingface_hub/inference/_generated/types/image_classification.py +43 -0
- huggingface_hub/inference/_generated/types/image_segmentation.py +52 -0
- huggingface_hub/inference/_generated/types/image_to_image.py +55 -0
- huggingface_hub/inference/_generated/types/image_to_text.py +105 -0
- huggingface_hub/inference/_generated/types/object_detection.py +55 -0
- huggingface_hub/inference/_generated/types/question_answering.py +77 -0
- huggingface_hub/inference/_generated/types/sentence_similarity.py +28 -0
- huggingface_hub/inference/_generated/types/summarization.py +46 -0
- huggingface_hub/inference/_generated/types/table_question_answering.py +45 -0
- huggingface_hub/inference/_generated/types/text2text_generation.py +45 -0
- huggingface_hub/inference/_generated/types/text_classification.py +43 -0
- huggingface_hub/inference/_generated/types/text_generation.py +161 -0
- huggingface_hub/inference/_generated/types/text_to_audio.py +105 -0
- huggingface_hub/inference/_generated/types/text_to_image.py +57 -0
- huggingface_hub/inference/_generated/types/token_classification.py +53 -0
- huggingface_hub/inference/_generated/types/translation.py +46 -0
- huggingface_hub/inference/_generated/types/video_classification.py +47 -0
- huggingface_hub/inference/_generated/types/visual_question_answering.py +53 -0
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +56 -0
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +51 -0
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +55 -0
- huggingface_hub/inference/_templating.py +105 -0
- huggingface_hub/inference/_types.py +4 -152
- huggingface_hub/keras_mixin.py +39 -17
- huggingface_hub/lfs.py +20 -8
- huggingface_hub/repocard.py +11 -3
- huggingface_hub/repocard_data.py +12 -2
- huggingface_hub/serialization/__init__.py +1 -0
- huggingface_hub/serialization/_base.py +1 -0
- huggingface_hub/serialization/_numpy.py +1 -0
- huggingface_hub/serialization/_tensorflow.py +1 -0
- huggingface_hub/serialization/_torch.py +1 -0
- huggingface_hub/utils/__init__.py +4 -1
- huggingface_hub/utils/_cache_manager.py +7 -0
- huggingface_hub/utils/_chunk_utils.py +1 -0
- huggingface_hub/utils/_datetime.py +1 -0
- huggingface_hub/utils/_errors.py +10 -1
- huggingface_hub/utils/_experimental.py +1 -0
- huggingface_hub/utils/_fixes.py +19 -3
- huggingface_hub/utils/_git_credential.py +1 -0
- huggingface_hub/utils/_headers.py +10 -3
- huggingface_hub/utils/_hf_folder.py +1 -0
- huggingface_hub/utils/_http.py +1 -0
- huggingface_hub/utils/_pagination.py +1 -0
- huggingface_hub/utils/_paths.py +1 -0
- huggingface_hub/utils/_runtime.py +22 -0
- huggingface_hub/utils/_subprocess.py +1 -0
- huggingface_hub/utils/_token.py +1 -0
- huggingface_hub/utils/_typing.py +29 -1
- huggingface_hub/utils/_validators.py +1 -0
- huggingface_hub/utils/endpoint_helpers.py +1 -0
- huggingface_hub/utils/logging.py +1 -1
- huggingface_hub/utils/sha.py +1 -0
- huggingface_hub/utils/tqdm.py +1 -0
- {huggingface_hub-0.21.2.dist-info → huggingface_hub-0.22.0.dist-info}/METADATA +14 -15
- huggingface_hub-0.22.0.dist-info/RECORD +113 -0
- {huggingface_hub-0.21.2.dist-info → huggingface_hub-0.22.0.dist-info}/WHEEL +1 -1
- huggingface_hub/inference/_text_generation.py +0 -551
- huggingface_hub-0.21.2.dist-info/RECORD +0 -81
- {huggingface_hub-0.21.2.dist-info → huggingface_hub-0.22.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.21.2.dist-info → huggingface_hub-0.22.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.21.2.dist-info → huggingface_hub-0.22.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# This file is auto-generated by `utils/generate_inference_types.py`.
|
|
2
|
+
# Do not modify it manually.
|
|
3
|
+
#
|
|
4
|
+
# ruff: noqa: F401
|
|
5
|
+
|
|
6
|
+
from .audio_classification import (
|
|
7
|
+
AudioClassificationInput,
|
|
8
|
+
AudioClassificationOutputElement,
|
|
9
|
+
AudioClassificationParameters,
|
|
10
|
+
)
|
|
11
|
+
from .audio_to_audio import AudioToAudioInput, AudioToAudioOutputElement
|
|
12
|
+
from .automatic_speech_recognition import (
|
|
13
|
+
AutomaticSpeechRecognitionGenerationParameters,
|
|
14
|
+
AutomaticSpeechRecognitionInput,
|
|
15
|
+
AutomaticSpeechRecognitionOutput,
|
|
16
|
+
AutomaticSpeechRecognitionOutputChunk,
|
|
17
|
+
AutomaticSpeechRecognitionParameters,
|
|
18
|
+
)
|
|
19
|
+
from .base import BaseInferenceType
|
|
20
|
+
from .chat_completion import (
|
|
21
|
+
ChatCompletionInput,
|
|
22
|
+
ChatCompletionInputMessage,
|
|
23
|
+
ChatCompletionOutput,
|
|
24
|
+
ChatCompletionOutputChoice,
|
|
25
|
+
ChatCompletionOutputChoiceMessage,
|
|
26
|
+
ChatCompletionStreamOutput,
|
|
27
|
+
ChatCompletionStreamOutputChoice,
|
|
28
|
+
ChatCompletionStreamOutputDelta,
|
|
29
|
+
)
|
|
30
|
+
from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
|
|
31
|
+
from .document_question_answering import (
|
|
32
|
+
DocumentQuestionAnsweringInput,
|
|
33
|
+
DocumentQuestionAnsweringInputData,
|
|
34
|
+
DocumentQuestionAnsweringOutputElement,
|
|
35
|
+
DocumentQuestionAnsweringParameters,
|
|
36
|
+
)
|
|
37
|
+
from .feature_extraction import FeatureExtractionInput
|
|
38
|
+
from .fill_mask import FillMaskInput, FillMaskOutputElement, FillMaskParameters
|
|
39
|
+
from .image_classification import (
|
|
40
|
+
ImageClassificationInput,
|
|
41
|
+
ImageClassificationOutputElement,
|
|
42
|
+
ImageClassificationParameters,
|
|
43
|
+
)
|
|
44
|
+
from .image_segmentation import ImageSegmentationInput, ImageSegmentationOutputElement, ImageSegmentationParameters
|
|
45
|
+
from .image_to_image import ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToImageTargetSize
|
|
46
|
+
from .image_to_text import ImageToTextGenerationParameters, ImageToTextInput, ImageToTextOutput, ImageToTextParameters
|
|
47
|
+
from .object_detection import (
|
|
48
|
+
ObjectDetectionBoundingBox,
|
|
49
|
+
ObjectDetectionInput,
|
|
50
|
+
ObjectDetectionOutputElement,
|
|
51
|
+
ObjectDetectionParameters,
|
|
52
|
+
)
|
|
53
|
+
from .question_answering import (
|
|
54
|
+
QuestionAnsweringInput,
|
|
55
|
+
QuestionAnsweringInputData,
|
|
56
|
+
QuestionAnsweringOutputElement,
|
|
57
|
+
QuestionAnsweringParameters,
|
|
58
|
+
)
|
|
59
|
+
from .sentence_similarity import SentenceSimilarityInput, SentenceSimilarityInputData
|
|
60
|
+
from .summarization import SummarizationGenerationParameters, SummarizationInput, SummarizationOutput
|
|
61
|
+
from .table_question_answering import (
|
|
62
|
+
TableQuestionAnsweringInput,
|
|
63
|
+
TableQuestionAnsweringInputData,
|
|
64
|
+
TableQuestionAnsweringOutputElement,
|
|
65
|
+
)
|
|
66
|
+
from .text2text_generation import Text2TextGenerationInput, Text2TextGenerationOutput, Text2TextGenerationParameters
|
|
67
|
+
from .text_classification import TextClassificationInput, TextClassificationOutputElement, TextClassificationParameters
|
|
68
|
+
from .text_generation import (
|
|
69
|
+
TextGenerationInput,
|
|
70
|
+
TextGenerationOutput,
|
|
71
|
+
TextGenerationOutputDetails,
|
|
72
|
+
TextGenerationOutputSequenceDetails,
|
|
73
|
+
TextGenerationOutputToken,
|
|
74
|
+
TextGenerationParameters,
|
|
75
|
+
TextGenerationPrefillToken,
|
|
76
|
+
TextGenerationStreamDetails,
|
|
77
|
+
TextGenerationStreamOutput,
|
|
78
|
+
)
|
|
79
|
+
from .text_to_audio import TextToAudioGenerationParameters, TextToAudioInput, TextToAudioOutput, TextToAudioParameters
|
|
80
|
+
from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters, TextToImageTargetSize
|
|
81
|
+
from .token_classification import (
|
|
82
|
+
TokenClassificationInput,
|
|
83
|
+
TokenClassificationOutputElement,
|
|
84
|
+
TokenClassificationParameters,
|
|
85
|
+
)
|
|
86
|
+
from .translation import TranslationGenerationParameters, TranslationInput, TranslationOutput
|
|
87
|
+
from .video_classification import (
|
|
88
|
+
VideoClassificationInput,
|
|
89
|
+
VideoClassificationOutputElement,
|
|
90
|
+
VideoClassificationParameters,
|
|
91
|
+
)
|
|
92
|
+
from .visual_question_answering import (
|
|
93
|
+
VisualQuestionAnsweringInput,
|
|
94
|
+
VisualQuestionAnsweringInputData,
|
|
95
|
+
VisualQuestionAnsweringOutputElement,
|
|
96
|
+
VisualQuestionAnsweringParameters,
|
|
97
|
+
)
|
|
98
|
+
from .zero_shot_classification import (
|
|
99
|
+
ZeroShotClassificationInput,
|
|
100
|
+
ZeroShotClassificationInputData,
|
|
101
|
+
ZeroShotClassificationOutputElement,
|
|
102
|
+
ZeroShotClassificationParameters,
|
|
103
|
+
)
|
|
104
|
+
from .zero_shot_image_classification import (
|
|
105
|
+
ZeroShotImageClassificationInput,
|
|
106
|
+
ZeroShotImageClassificationInputData,
|
|
107
|
+
ZeroShotImageClassificationOutputElement,
|
|
108
|
+
ZeroShotImageClassificationParameters,
|
|
109
|
+
)
|
|
110
|
+
from .zero_shot_object_detection import (
|
|
111
|
+
ZeroShotObjectDetectionBoundingBox,
|
|
112
|
+
ZeroShotObjectDetectionInput,
|
|
113
|
+
ZeroShotObjectDetectionInputData,
|
|
114
|
+
ZeroShotObjectDetectionOutputElement,
|
|
115
|
+
)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Inference code generated from the JSON schema spec in @huggingface/tasks.
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
|
+
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Literal, Optional
|
|
8
|
+
|
|
9
|
+
from .base import BaseInferenceType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
ClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class AudioClassificationParameters(BaseInferenceType):
|
|
17
|
+
"""Additional inference parameters
|
|
18
|
+
Additional inference parameters for Audio Classification
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
function_to_apply: Optional["ClassificationOutputTransform"] = None
|
|
22
|
+
top_k: Optional[int] = None
|
|
23
|
+
"""When specified, limits the output to the top K most probable classes."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class AudioClassificationInput(BaseInferenceType):
|
|
28
|
+
"""Inputs for Audio Classification inference"""
|
|
29
|
+
|
|
30
|
+
inputs: Any
|
|
31
|
+
"""The input audio data"""
|
|
32
|
+
parameters: Optional[AudioClassificationParameters] = None
|
|
33
|
+
"""Additional inference parameters"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class AudioClassificationOutputElement(BaseInferenceType):
|
|
38
|
+
"""Outputs for Audio Classification inference"""
|
|
39
|
+
|
|
40
|
+
label: str
|
|
41
|
+
"""The predicted class label."""
|
|
42
|
+
score: float
|
|
43
|
+
"""The corresponding probability."""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Inference code generated from the JSON schema spec in @huggingface/tasks.
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
|
+
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .base import BaseInferenceType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class AudioToAudioInput(BaseInferenceType):
|
|
14
|
+
"""Inputs for Audio to Audio inference"""
|
|
15
|
+
|
|
16
|
+
inputs: Any
|
|
17
|
+
"""The input audio data"""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class AudioToAudioOutputElement(BaseInferenceType):
|
|
22
|
+
"""Outputs of inference for the Audio To Audio task
|
|
23
|
+
A generated audio file with its label.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
blob: Any
|
|
27
|
+
"""The generated audio file."""
|
|
28
|
+
content_type: str
|
|
29
|
+
"""The content type of audio file."""
|
|
30
|
+
label: str
|
|
31
|
+
"""The label of the audio file."""
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# Inference code generated from the JSON schema spec in @huggingface/tasks.
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
|
+
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, List, Literal, Optional, Union
|
|
8
|
+
|
|
9
|
+
from .base import BaseInferenceType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
EarlyStoppingEnum = Literal["never"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
|
|
17
|
+
"""Parametrization of the text generation process
|
|
18
|
+
Ad-hoc parametrization of the text generation process
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
do_sample: Optional[bool] = None
|
|
22
|
+
"""Whether to use sampling instead of greedy decoding when generating new tokens."""
|
|
23
|
+
early_stopping: Optional[Union[bool, "EarlyStoppingEnum"]] = None
|
|
24
|
+
"""Controls the stopping condition for beam-based methods."""
|
|
25
|
+
epsilon_cutoff: Optional[float] = None
|
|
26
|
+
"""If set to float strictly between 0 and 1, only tokens with a conditional probability
|
|
27
|
+
greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
|
|
28
|
+
3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
|
|
29
|
+
Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
|
|
30
|
+
"""
|
|
31
|
+
eta_cutoff: Optional[float] = None
|
|
32
|
+
"""Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
|
|
33
|
+
float strictly between 0 and 1, a token is only considered if it is greater than either
|
|
34
|
+
eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
|
|
35
|
+
term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
|
|
36
|
+
the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
|
|
37
|
+
See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
|
|
38
|
+
for more details.
|
|
39
|
+
"""
|
|
40
|
+
max_length: Optional[int] = None
|
|
41
|
+
"""The maximum length (in tokens) of the generated text, including the input."""
|
|
42
|
+
max_new_tokens: Optional[int] = None
|
|
43
|
+
"""The maximum number of tokens to generate. Takes precedence over maxLength."""
|
|
44
|
+
min_length: Optional[int] = None
|
|
45
|
+
"""The minimum length (in tokens) of the generated text, including the input."""
|
|
46
|
+
min_new_tokens: Optional[int] = None
|
|
47
|
+
"""The minimum number of tokens to generate. Takes precedence over maxLength."""
|
|
48
|
+
num_beam_groups: Optional[int] = None
|
|
49
|
+
"""Number of groups to divide num_beams into in order to ensure diversity among different
|
|
50
|
+
groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
|
|
51
|
+
"""
|
|
52
|
+
num_beams: Optional[int] = None
|
|
53
|
+
"""Number of beams to use for beam search."""
|
|
54
|
+
penalty_alpha: Optional[float] = None
|
|
55
|
+
"""The value balances the model confidence and the degeneration penalty in contrastive
|
|
56
|
+
search decoding.
|
|
57
|
+
"""
|
|
58
|
+
temperature: Optional[float] = None
|
|
59
|
+
"""The value used to modulate the next token probabilities."""
|
|
60
|
+
top_k: Optional[int] = None
|
|
61
|
+
"""The number of highest probability vocabulary tokens to keep for top-k-filtering."""
|
|
62
|
+
top_p: Optional[float] = None
|
|
63
|
+
"""If set to float < 1, only the smallest set of most probable tokens with probabilities
|
|
64
|
+
that add up to top_p or higher are kept for generation.
|
|
65
|
+
"""
|
|
66
|
+
typical_p: Optional[float] = None
|
|
67
|
+
"""Local typicality measures how similar the conditional probability of predicting a target
|
|
68
|
+
token next is to the expected conditional probability of predicting a random token next,
|
|
69
|
+
given the partial text already generated. If set to float < 1, the smallest set of the
|
|
70
|
+
most locally typical tokens with probabilities that add up to typical_p or higher are
|
|
71
|
+
kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
|
|
72
|
+
"""
|
|
73
|
+
use_cache: Optional[bool] = None
|
|
74
|
+
"""Whether the model should use the past last key/values attentions to speed up decoding"""
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class AutomaticSpeechRecognitionParameters(BaseInferenceType):
|
|
79
|
+
"""Additional inference parameters
|
|
80
|
+
Additional inference parameters for Automatic Speech Recognition
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
generate: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
|
|
84
|
+
"""Parametrization of the text generation process"""
|
|
85
|
+
return_timestamps: Optional[bool] = None
|
|
86
|
+
"""Whether to output corresponding timestamps with the generated text"""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class AutomaticSpeechRecognitionInput(BaseInferenceType):
|
|
91
|
+
"""Inputs for Automatic Speech Recognition inference"""
|
|
92
|
+
|
|
93
|
+
inputs: Any
|
|
94
|
+
"""The input audio data"""
|
|
95
|
+
parameters: Optional[AutomaticSpeechRecognitionParameters] = None
|
|
96
|
+
"""Additional inference parameters"""
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType):
|
|
101
|
+
text: str
|
|
102
|
+
"""A chunk of text identified by the model"""
|
|
103
|
+
timestamps: List[float]
|
|
104
|
+
"""The start and end timestamps corresponding with the text"""
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass
|
|
108
|
+
class AutomaticSpeechRecognitionOutput(BaseInferenceType):
|
|
109
|
+
"""Outputs of inference for the Automatic Speech Recognition task"""
|
|
110
|
+
|
|
111
|
+
text: str
|
|
112
|
+
"""The recognized text."""
|
|
113
|
+
chunks: Optional[List[AutomaticSpeechRecognitionOutputChunk]] = None
|
|
114
|
+
"""When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
|
|
115
|
+
the model.
|
|
116
|
+
"""
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Contains a base class for all inference types."""
|
|
15
|
+
|
|
16
|
+
import inspect
|
|
17
|
+
import json
|
|
18
|
+
import warnings
|
|
19
|
+
from dataclasses import asdict, dataclass
|
|
20
|
+
from typing import Any, Dict, List, Type, TypeVar, Union, get_args
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
T = TypeVar("T", bound="BaseInferenceType")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class BaseInferenceType(dict):
|
|
28
|
+
"""Base class for all inference types.
|
|
29
|
+
|
|
30
|
+
Object is a dataclass and a dict for backward compatibility but plan is to remove the dict part in the future.
|
|
31
|
+
|
|
32
|
+
Handle parsing from dict, list and json strings in a permissive way to ensure future-compatibility (e.g. all fields
|
|
33
|
+
are made optional, and non-expected fields are added as dict attributes).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def parse_obj_as_list(cls: Type[T], data: Union[bytes, str, List, Dict]) -> List[T]:
|
|
38
|
+
"""Alias to parse server response and return a single instance.
|
|
39
|
+
|
|
40
|
+
See `parse_obj` for more details.
|
|
41
|
+
"""
|
|
42
|
+
output = cls.parse_obj(data)
|
|
43
|
+
if not isinstance(output, list):
|
|
44
|
+
raise ValueError(f"Invalid input data for {cls}. Expected a list, but got {type(output)}.")
|
|
45
|
+
return output
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def parse_obj_as_instance(cls: Type[T], data: Union[bytes, str, List, Dict]) -> T:
|
|
49
|
+
"""Alias to parse server response and return a single instance.
|
|
50
|
+
|
|
51
|
+
See `parse_obj` for more details.
|
|
52
|
+
"""
|
|
53
|
+
output = cls.parse_obj(data)
|
|
54
|
+
if isinstance(output, list):
|
|
55
|
+
raise ValueError(f"Invalid input data for {cls}. Expected a single instance, but got a list.")
|
|
56
|
+
return output
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def parse_obj(cls: Type[T], data: Union[bytes, str, List, Dict]) -> Union[List[T], T]:
|
|
60
|
+
"""Parse server response as a dataclass or list of dataclasses.
|
|
61
|
+
|
|
62
|
+
To enable future-compatibility, we want to handle cases where the server return more fields than expected.
|
|
63
|
+
In such cases, we don't want to raise an error but still create the dataclass object. Remaining fields are
|
|
64
|
+
added as dict attributes.
|
|
65
|
+
"""
|
|
66
|
+
# Parse server response (from bytes)
|
|
67
|
+
if isinstance(data, bytes):
|
|
68
|
+
data = data.decode()
|
|
69
|
+
if isinstance(data, str):
|
|
70
|
+
data = json.loads(data)
|
|
71
|
+
|
|
72
|
+
# If a list, parse each item individually
|
|
73
|
+
if isinstance(data, List):
|
|
74
|
+
return [cls.parse_obj(d) for d in data] # type: ignore [misc]
|
|
75
|
+
|
|
76
|
+
# At this point, we expect a dict
|
|
77
|
+
if not isinstance(data, dict):
|
|
78
|
+
raise ValueError(f"Invalid data type: {type(data)}")
|
|
79
|
+
|
|
80
|
+
init_values = {}
|
|
81
|
+
other_values = {}
|
|
82
|
+
for key, value in data.items():
|
|
83
|
+
key = normalize_key(key)
|
|
84
|
+
if key in cls.__dataclass_fields__ and cls.__dataclass_fields__[key].init:
|
|
85
|
+
if isinstance(value, dict) or isinstance(value, list):
|
|
86
|
+
field_type = cls.__dataclass_fields__[key].type
|
|
87
|
+
|
|
88
|
+
# if `field_type` is a `BaseInferenceType`, parse it
|
|
89
|
+
if inspect.isclass(field_type) and issubclass(field_type, BaseInferenceType):
|
|
90
|
+
value = field_type.parse_obj(value)
|
|
91
|
+
|
|
92
|
+
# otherwise, recursively parse nested dataclasses (if possible)
|
|
93
|
+
# `get_args` returns handle Union and Optional for us
|
|
94
|
+
else:
|
|
95
|
+
expected_types = get_args(field_type)
|
|
96
|
+
for expected_type in expected_types:
|
|
97
|
+
if getattr(expected_type, "_name", None) == "List":
|
|
98
|
+
expected_type = get_args(expected_type)[
|
|
99
|
+
0
|
|
100
|
+
] # assume same type for all items in the list
|
|
101
|
+
if inspect.isclass(expected_type) and issubclass(expected_type, BaseInferenceType):
|
|
102
|
+
value = expected_type.parse_obj(value)
|
|
103
|
+
break
|
|
104
|
+
init_values[key] = value
|
|
105
|
+
else:
|
|
106
|
+
other_values[key] = value
|
|
107
|
+
|
|
108
|
+
# Make all missing fields default to None
|
|
109
|
+
# => ensure that dataclass initialization will never fail even if the server does not return all fields.
|
|
110
|
+
for key in cls.__dataclass_fields__:
|
|
111
|
+
if key not in init_values:
|
|
112
|
+
init_values[key] = None
|
|
113
|
+
|
|
114
|
+
# Initialize dataclass with expected values
|
|
115
|
+
item = cls(**init_values)
|
|
116
|
+
|
|
117
|
+
# Add remaining fields as dict attributes
|
|
118
|
+
item.update(other_values)
|
|
119
|
+
return item
|
|
120
|
+
|
|
121
|
+
def __post_init__(self):
|
|
122
|
+
self.update(asdict(self))
|
|
123
|
+
|
|
124
|
+
def __setitem__(self, __key: Any, __value: Any) -> None:
|
|
125
|
+
# Hacky way to keep dataclass values in sync when dict is updated
|
|
126
|
+
super().__setitem__(__key, __value)
|
|
127
|
+
if __key in self.__dataclass_fields__ and getattr(self, __key, None) != __value:
|
|
128
|
+
self.__setattr__(__key, __value)
|
|
129
|
+
return
|
|
130
|
+
|
|
131
|
+
def __setattr__(self, __name: str, __value: Any) -> None:
|
|
132
|
+
# Hacky way to keep dict values is sync when dataclass is updated
|
|
133
|
+
super().__setattr__(__name, __value)
|
|
134
|
+
if self.get(__name) != __value:
|
|
135
|
+
self[__name] = __value
|
|
136
|
+
return
|
|
137
|
+
|
|
138
|
+
def __getitem__(self, __key: Any) -> Any:
|
|
139
|
+
warnings.warn(
|
|
140
|
+
f"Accessing '{self.__class__.__name__}' values through dict is deprecated and "
|
|
141
|
+
"will be removed from version '0.25'. Use dataclass attributes instead.",
|
|
142
|
+
FutureWarning,
|
|
143
|
+
)
|
|
144
|
+
return super().__getitem__(__key)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def normalize_key(key: str) -> str:
|
|
148
|
+
# e.g "content-type" -> "content_type", "Accept" -> "accept"
|
|
149
|
+
return key.replace("-", "_").replace(" ", "_").lower()
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# Inference code generated from the JSON schema spec in @huggingface/tasks.
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
|
+
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import List, Literal, Optional, Union
|
|
8
|
+
|
|
9
|
+
from .base import BaseInferenceType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
ChatCompletionMessageRole = Literal["assistant", "system", "user"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ChatCompletionInputMessage(BaseInferenceType):
|
|
17
|
+
content: str
|
|
18
|
+
"""The content of the message."""
|
|
19
|
+
role: "ChatCompletionMessageRole"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class ChatCompletionInput(BaseInferenceType):
|
|
24
|
+
"""Inputs for ChatCompletion inference"""
|
|
25
|
+
|
|
26
|
+
messages: List[ChatCompletionInputMessage]
|
|
27
|
+
frequency_penalty: Optional[float] = None
|
|
28
|
+
"""Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
|
|
29
|
+
frequency in the text so far, decreasing the model's likelihood to repeat the same line
|
|
30
|
+
verbatim.
|
|
31
|
+
"""
|
|
32
|
+
max_tokens: Optional[int] = None
|
|
33
|
+
"""The maximum number of tokens that can be generated in the chat completion."""
|
|
34
|
+
seed: Optional[int] = None
|
|
35
|
+
"""The random sampling seed."""
|
|
36
|
+
stop: Optional[Union[List[str], str]] = None
|
|
37
|
+
"""Stop generating tokens if a stop token is generated."""
|
|
38
|
+
stream: Optional[bool] = None
|
|
39
|
+
"""If set, partial message deltas will be sent."""
|
|
40
|
+
temperature: Optional[float] = None
|
|
41
|
+
"""The value used to modulate the logits distribution."""
|
|
42
|
+
top_p: Optional[float] = None
|
|
43
|
+
"""If set to < 1, only the smallest set of most probable tokens with probabilities that add
|
|
44
|
+
up to `top_p` or higher are kept for generation.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
ChatCompletionFinishReason = Literal["length", "eos_token", "stop_sequence"]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class ChatCompletionOutputChoiceMessage(BaseInferenceType):
|
|
53
|
+
content: str
|
|
54
|
+
"""The content of the chat completion message."""
|
|
55
|
+
role: "ChatCompletionMessageRole"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class ChatCompletionOutputChoice(BaseInferenceType):
|
|
60
|
+
finish_reason: "ChatCompletionFinishReason"
|
|
61
|
+
"""The reason why the generation was stopped."""
|
|
62
|
+
index: int
|
|
63
|
+
"""The index of the choice in the list of choices."""
|
|
64
|
+
message: ChatCompletionOutputChoiceMessage
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class ChatCompletionOutput(BaseInferenceType):
|
|
69
|
+
"""Outputs for Chat Completion inference"""
|
|
70
|
+
|
|
71
|
+
choices: List[ChatCompletionOutputChoice]
|
|
72
|
+
"""A list of chat completion choices."""
|
|
73
|
+
created: int
|
|
74
|
+
"""The Unix timestamp (in seconds) of when the chat completion was created."""
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class ChatCompletionStreamOutputDelta(BaseInferenceType):
|
|
79
|
+
"""A chat completion delta generated by streamed model responses."""
|
|
80
|
+
|
|
81
|
+
content: Optional[str] = None
|
|
82
|
+
"""The contents of the chunk message."""
|
|
83
|
+
role: Optional[str] = None
|
|
84
|
+
"""The role of the author of this message."""
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass
|
|
88
|
+
class ChatCompletionStreamOutputChoice(BaseInferenceType):
|
|
89
|
+
delta: ChatCompletionStreamOutputDelta
|
|
90
|
+
"""A chat completion delta generated by streamed model responses."""
|
|
91
|
+
index: int
|
|
92
|
+
"""The index of the choice in the list of choices."""
|
|
93
|
+
finish_reason: Optional["ChatCompletionFinishReason"] = None
|
|
94
|
+
"""The reason why the generation was stopped."""
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class ChatCompletionStreamOutput(BaseInferenceType):
|
|
99
|
+
"""Chat Completion Stream Output"""
|
|
100
|
+
|
|
101
|
+
choices: List[ChatCompletionStreamOutputChoice]
|
|
102
|
+
"""A list of chat completion choices."""
|
|
103
|
+
created: int
|
|
104
|
+
"""The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
|
|
105
|
+
the same timestamp.
|
|
106
|
+
"""
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Inference code generated from the JSON schema spec in @huggingface/tasks.
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
|
+
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
from .base import BaseInferenceType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DepthEstimationInput(BaseInferenceType):
|
|
14
|
+
"""Inputs for Depth Estimation inference"""
|
|
15
|
+
|
|
16
|
+
inputs: Any
|
|
17
|
+
"""The input image data"""
|
|
18
|
+
parameters: Optional[Dict[str, Any]] = None
|
|
19
|
+
"""Additional inference parameters"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class DepthEstimationOutput(BaseInferenceType):
|
|
24
|
+
"""Outputs of inference for the Depth Estimation task"""
|
|
25
|
+
|
|
26
|
+
depth: Any
|
|
27
|
+
"""The predicted depth as an image"""
|
|
28
|
+
predicted_depth: Any
|
|
29
|
+
"""The predicted depth as a tensor"""
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Inference code generated from the JSON schema spec in @huggingface/tasks.
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
|
+
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, List, Optional, Union
|
|
8
|
+
|
|
9
|
+
from .base import BaseInferenceType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DocumentQuestionAnsweringInputData(BaseInferenceType):
|
|
14
|
+
"""One (document, question) pair to answer"""
|
|
15
|
+
|
|
16
|
+
image: Any
|
|
17
|
+
"""The image on which the question is asked"""
|
|
18
|
+
question: str
|
|
19
|
+
"""A question to ask of the document"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class DocumentQuestionAnsweringParameters(BaseInferenceType):
|
|
24
|
+
"""Additional inference parameters
|
|
25
|
+
Additional inference parameters for Document Question Answering
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
doc_stride: Optional[int] = None
|
|
29
|
+
"""If the words in the document are too long to fit with the question for the model, it will
|
|
30
|
+
be split in several chunks with some overlap. This argument controls the size of that
|
|
31
|
+
overlap.
|
|
32
|
+
"""
|
|
33
|
+
handle_impossible_answer: Optional[bool] = None
|
|
34
|
+
"""Whether to accept impossible as an answer"""
|
|
35
|
+
lang: Optional[str] = None
|
|
36
|
+
"""Language to use while running OCR. Defaults to english."""
|
|
37
|
+
max_answer_len: Optional[int] = None
|
|
38
|
+
"""The maximum length of predicted answers (e.g., only answers with a shorter length are
|
|
39
|
+
considered).
|
|
40
|
+
"""
|
|
41
|
+
max_question_len: Optional[int] = None
|
|
42
|
+
"""The maximum length of the question after tokenization. It will be truncated if needed."""
|
|
43
|
+
max_seq_len: Optional[int] = None
|
|
44
|
+
"""The maximum length of the total sentence (context + question) in tokens of each chunk
|
|
45
|
+
passed to the model. The context will be split in several chunks (using doc_stride as
|
|
46
|
+
overlap) if needed.
|
|
47
|
+
"""
|
|
48
|
+
top_k: Optional[int] = None
|
|
49
|
+
"""The number of answers to return (will be chosen by order of likelihood). Can return less
|
|
50
|
+
than top_k answers if there are not enough options available within the context.
|
|
51
|
+
"""
|
|
52
|
+
word_boxes: Optional[List[Union[List[float], str]]] = None
|
|
53
|
+
"""A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
|
|
54
|
+
skip the OCR step and use the provided bounding boxes instead.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class DocumentQuestionAnsweringInput(BaseInferenceType):
|
|
60
|
+
"""Inputs for Document Question Answering inference"""
|
|
61
|
+
|
|
62
|
+
inputs: DocumentQuestionAnsweringInputData
|
|
63
|
+
"""One (document, question) pair to answer"""
|
|
64
|
+
parameters: Optional[DocumentQuestionAnsweringParameters] = None
|
|
65
|
+
"""Additional inference parameters"""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class DocumentQuestionAnsweringOutputElement(BaseInferenceType):
|
|
70
|
+
"""Outputs of inference for the Document Question Answering task"""
|
|
71
|
+
|
|
72
|
+
answer: str
|
|
73
|
+
"""The answer to the question."""
|
|
74
|
+
end: int
|
|
75
|
+
"""The end word index of the answer (in the OCR’d version of the input or provided word
|
|
76
|
+
boxes).
|
|
77
|
+
"""
|
|
78
|
+
score: float
|
|
79
|
+
"""The probability associated to the answer."""
|
|
80
|
+
start: int
|
|
81
|
+
"""The start word index of the answer (in the OCR’d version of the input or provided word
|
|
82
|
+
boxes).
|
|
83
|
+
"""
|
|
84
|
+
words: List[int]
|
|
85
|
+
"""The index of each word/box pair that is in the answer"""
|