huggingface-hub 0.30.1__py3-none-any.whl → 0.31.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +1 -1
- huggingface_hub/_commit_api.py +23 -4
- huggingface_hub/_inference_endpoints.py +8 -5
- huggingface_hub/_snapshot_download.py +2 -1
- huggingface_hub/_space_api.py +0 -5
- huggingface_hub/_upload_large_folder.py +26 -3
- huggingface_hub/commands/upload.py +2 -1
- huggingface_hub/constants.py +1 -0
- huggingface_hub/file_download.py +58 -10
- huggingface_hub/hf_api.py +81 -15
- huggingface_hub/inference/_client.py +105 -150
- huggingface_hub/inference/_generated/_async_client.py +105 -150
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +2 -3
- huggingface_hub/inference/_generated/types/chat_completion.py +3 -3
- huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
- huggingface_hub/inference/_generated/types/text_generation.py +1 -1
- huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
- huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
- huggingface_hub/inference/_providers/__init__.py +55 -17
- huggingface_hub/inference/_providers/_common.py +34 -19
- huggingface_hub/inference/_providers/black_forest_labs.py +4 -1
- huggingface_hub/inference/_providers/fal_ai.py +36 -11
- huggingface_hub/inference/_providers/hf_inference.py +33 -11
- huggingface_hub/inference/_providers/hyperbolic.py +5 -1
- huggingface_hub/inference/_providers/nebius.py +15 -1
- huggingface_hub/inference/_providers/novita.py +14 -1
- huggingface_hub/inference/_providers/openai.py +3 -2
- huggingface_hub/inference/_providers/replicate.py +22 -3
- huggingface_hub/inference/_providers/sambanova.py +23 -1
- huggingface_hub/inference/_providers/together.py +15 -1
- huggingface_hub/repocard_data.py +24 -4
- huggingface_hub/utils/_pagination.py +2 -2
- huggingface_hub/utils/_runtime.py +4 -0
- huggingface_hub/utils/_xet.py +1 -12
- {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/METADATA +3 -2
- {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/RECORD +40 -40
- {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -75,11 +75,10 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
|
|
|
75
75
|
class AutomaticSpeechRecognitionParameters(BaseInferenceType):
|
|
76
76
|
"""Additional inference parameters for Automatic Speech Recognition"""
|
|
77
77
|
|
|
78
|
+
generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
|
|
79
|
+
"""Parametrization of the text generation process"""
|
|
78
80
|
return_timestamps: Optional[bool] = None
|
|
79
81
|
"""Whether to output corresponding timestamps with the generated text"""
|
|
80
|
-
# Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
|
|
81
|
-
generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
|
|
82
|
-
"""Parametrization of the text generation process"""
|
|
83
82
|
|
|
84
83
|
|
|
85
84
|
@dataclass_with_extra
|
|
@@ -25,8 +25,8 @@ class ChatCompletionInputMessageChunk(BaseInferenceType):
|
|
|
25
25
|
|
|
26
26
|
@dataclass_with_extra
|
|
27
27
|
class ChatCompletionInputFunctionDefinition(BaseInferenceType):
|
|
28
|
-
arguments: Any
|
|
29
28
|
name: str
|
|
29
|
+
parameters: Any
|
|
30
30
|
description: Optional[str] = None
|
|
31
31
|
|
|
32
32
|
|
|
@@ -45,7 +45,7 @@ class ChatCompletionInputMessage(BaseInferenceType):
|
|
|
45
45
|
tool_calls: Optional[List[ChatCompletionInputToolCall]] = None
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
ChatCompletionInputGrammarTypeType = Literal["json", "regex"]
|
|
48
|
+
ChatCompletionInputGrammarTypeType = Literal["json", "regex", "json_schema"]
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
@dataclass_with_extra
|
|
@@ -189,7 +189,7 @@ class ChatCompletionOutputLogprobs(BaseInferenceType):
|
|
|
189
189
|
|
|
190
190
|
@dataclass_with_extra
|
|
191
191
|
class ChatCompletionOutputFunctionDefinition(BaseInferenceType):
|
|
192
|
-
arguments:
|
|
192
|
+
arguments: str
|
|
193
193
|
name: str
|
|
194
194
|
description: Optional[str] = None
|
|
195
195
|
|
|
@@ -75,11 +75,10 @@ class ImageToTextGenerationParameters(BaseInferenceType):
|
|
|
75
75
|
class ImageToTextParameters(BaseInferenceType):
|
|
76
76
|
"""Additional inference parameters for Image To Text"""
|
|
77
77
|
|
|
78
|
+
generation_parameters: Optional[ImageToTextGenerationParameters] = None
|
|
79
|
+
"""Parametrization of the text generation process"""
|
|
78
80
|
max_new_tokens: Optional[int] = None
|
|
79
81
|
"""The amount of maximum tokens to generate."""
|
|
80
|
-
# Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
|
|
81
|
-
generate_kwargs: Optional[ImageToTextGenerationParameters] = None
|
|
82
|
-
"""Parametrization of the text generation process"""
|
|
83
82
|
|
|
84
83
|
|
|
85
84
|
@dataclass_with_extra
|
|
@@ -75,8 +75,7 @@ class TextToAudioGenerationParameters(BaseInferenceType):
|
|
|
75
75
|
class TextToAudioParameters(BaseInferenceType):
|
|
76
76
|
"""Additional inference parameters for Text To Audio"""
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
generate_kwargs: Optional[TextToAudioGenerationParameters] = None
|
|
78
|
+
generation_parameters: Optional[TextToAudioGenerationParameters] = None
|
|
80
79
|
"""Parametrization of the text generation process"""
|
|
81
80
|
|
|
82
81
|
|
|
@@ -75,8 +75,7 @@ class TextToSpeechGenerationParameters(BaseInferenceType):
|
|
|
75
75
|
class TextToSpeechParameters(BaseInferenceType):
|
|
76
76
|
"""Additional inference parameters for Text To Speech"""
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
generate_kwargs: Optional[TextToSpeechGenerationParameters] = None
|
|
78
|
+
generation_parameters: Optional[TextToSpeechGenerationParameters] = None
|
|
80
79
|
"""Parametrization of the text generation process"""
|
|
81
80
|
|
|
82
81
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from typing import Dict, Literal
|
|
1
|
+
from typing import Dict, Literal, Optional, Union
|
|
2
2
|
|
|
3
|
-
from .
|
|
3
|
+
from huggingface_hub.utils import logging
|
|
4
|
+
|
|
5
|
+
from ._common import TaskProviderHelper, _fetch_inference_provider_mapping
|
|
4
6
|
from .black_forest_labs import BlackForestLabsTextToImageTask
|
|
5
7
|
from .cerebras import CerebrasConversationalTask
|
|
6
8
|
from .cohere import CohereConversationalTask
|
|
@@ -11,16 +13,24 @@ from .fal_ai import (
|
|
|
11
13
|
FalAITextToVideoTask,
|
|
12
14
|
)
|
|
13
15
|
from .fireworks_ai import FireworksAIConversationalTask
|
|
14
|
-
from .hf_inference import
|
|
16
|
+
from .hf_inference import (
|
|
17
|
+
HFInferenceBinaryInputTask,
|
|
18
|
+
HFInferenceConversational,
|
|
19
|
+
HFInferenceFeatureExtractionTask,
|
|
20
|
+
HFInferenceTask,
|
|
21
|
+
)
|
|
15
22
|
from .hyperbolic import HyperbolicTextGenerationTask, HyperbolicTextToImageTask
|
|
16
23
|
from .nebius import NebiusConversationalTask, NebiusTextGenerationTask, NebiusTextToImageTask
|
|
17
24
|
from .novita import NovitaConversationalTask, NovitaTextGenerationTask, NovitaTextToVideoTask
|
|
18
25
|
from .openai import OpenAIConversationalTask
|
|
19
|
-
from .replicate import ReplicateTask, ReplicateTextToSpeechTask
|
|
20
|
-
from .sambanova import SambanovaConversationalTask
|
|
26
|
+
from .replicate import ReplicateTask, ReplicateTextToImageTask, ReplicateTextToSpeechTask
|
|
27
|
+
from .sambanova import SambanovaConversationalTask, SambanovaFeatureExtractionTask
|
|
21
28
|
from .together import TogetherConversationalTask, TogetherTextGenerationTask, TogetherTextToImageTask
|
|
22
29
|
|
|
23
30
|
|
|
31
|
+
logger = logging.get_logger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
24
34
|
PROVIDER_T = Literal[
|
|
25
35
|
"black-forest-labs",
|
|
26
36
|
"cerebras",
|
|
@@ -37,6 +47,8 @@ PROVIDER_T = Literal[
|
|
|
37
47
|
"together",
|
|
38
48
|
]
|
|
39
49
|
|
|
50
|
+
PROVIDER_OR_POLICY_T = Union[PROVIDER_T, Literal["auto"]]
|
|
51
|
+
|
|
40
52
|
PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
|
|
41
53
|
"black-forest-labs": {
|
|
42
54
|
"text-to-image": BlackForestLabsTextToImageTask(),
|
|
@@ -65,7 +77,7 @@ PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
|
|
|
65
77
|
"audio-classification": HFInferenceBinaryInputTask("audio-classification"),
|
|
66
78
|
"automatic-speech-recognition": HFInferenceBinaryInputTask("automatic-speech-recognition"),
|
|
67
79
|
"fill-mask": HFInferenceTask("fill-mask"),
|
|
68
|
-
"feature-extraction":
|
|
80
|
+
"feature-extraction": HFInferenceFeatureExtractionTask(),
|
|
69
81
|
"image-classification": HFInferenceBinaryInputTask("image-classification"),
|
|
70
82
|
"image-segmentation": HFInferenceBinaryInputTask("image-segmentation"),
|
|
71
83
|
"document-question-answering": HFInferenceTask("document-question-answering"),
|
|
@@ -103,12 +115,13 @@ PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
|
|
|
103
115
|
"conversational": OpenAIConversationalTask(),
|
|
104
116
|
},
|
|
105
117
|
"replicate": {
|
|
106
|
-
"text-to-image":
|
|
118
|
+
"text-to-image": ReplicateTextToImageTask(),
|
|
107
119
|
"text-to-speech": ReplicateTextToSpeechTask(),
|
|
108
120
|
"text-to-video": ReplicateTask("text-to-video"),
|
|
109
121
|
},
|
|
110
122
|
"sambanova": {
|
|
111
123
|
"conversational": SambanovaConversationalTask(),
|
|
124
|
+
"feature-extraction": SambanovaFeatureExtractionTask(),
|
|
112
125
|
},
|
|
113
126
|
"together": {
|
|
114
127
|
"text-to-image": TogetherTextToImageTask(),
|
|
@@ -118,24 +131,49 @@ PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
|
|
|
118
131
|
}
|
|
119
132
|
|
|
120
133
|
|
|
121
|
-
def get_provider_helper(
|
|
134
|
+
def get_provider_helper(
|
|
135
|
+
provider: Optional[PROVIDER_OR_POLICY_T], task: str, model: Optional[str]
|
|
136
|
+
) -> TaskProviderHelper:
|
|
122
137
|
"""Get provider helper instance by name and task.
|
|
123
138
|
|
|
124
139
|
Args:
|
|
125
|
-
provider (str):
|
|
126
|
-
task (str): Name of the task
|
|
127
|
-
|
|
140
|
+
provider (`str`, *optional*): name of the provider, or "auto" to automatically select the provider for the model.
|
|
141
|
+
task (`str`): Name of the task
|
|
142
|
+
model (`str`, *optional*): Name of the model
|
|
128
143
|
Returns:
|
|
129
144
|
TaskProviderHelper: Helper instance for the specified provider and task
|
|
130
145
|
|
|
131
146
|
Raises:
|
|
132
147
|
ValueError: If provider or task is not supported
|
|
133
148
|
"""
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
149
|
+
|
|
150
|
+
if (model is None and provider in (None, "auto")) or (
|
|
151
|
+
model is not None and model.startswith(("http://", "https://"))
|
|
152
|
+
):
|
|
153
|
+
provider = "hf-inference"
|
|
154
|
+
|
|
155
|
+
if provider is None:
|
|
156
|
+
logger.info(
|
|
157
|
+
"Defaulting to 'auto' which will select the first provider available for the model, sorted by the user's order in https://hf.co/settings/inference-providers."
|
|
158
|
+
)
|
|
159
|
+
provider = "auto"
|
|
160
|
+
|
|
161
|
+
if provider == "auto":
|
|
162
|
+
if model is None:
|
|
163
|
+
raise ValueError("Specifying a model is required when provider is 'auto'")
|
|
164
|
+
provider_mapping = _fetch_inference_provider_mapping(model)
|
|
165
|
+
provider = next(iter(provider_mapping))
|
|
166
|
+
|
|
167
|
+
provider_tasks = PROVIDERS.get(provider) # type: ignore
|
|
168
|
+
if provider_tasks is None:
|
|
169
|
+
raise ValueError(
|
|
170
|
+
f"Provider '{provider}' not supported. Available values: 'auto' or any provider from {list(PROVIDERS.keys())}."
|
|
171
|
+
"Passing 'auto' (default value) will automatically select the first provider available for the model, sorted "
|
|
172
|
+
"by the user's order in https://hf.co/settings/inference-providers."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if task not in provider_tasks:
|
|
137
176
|
raise ValueError(
|
|
138
|
-
f"Task '{task}' not supported for provider '{provider}'. "
|
|
139
|
-
f"Available tasks: {list(PROVIDERS[provider].keys())}"
|
|
177
|
+
f"Task '{task}' not supported for provider '{provider}'. Available tasks: {list(provider_tasks.keys())}"
|
|
140
178
|
)
|
|
141
|
-
return
|
|
179
|
+
return provider_tasks[task]
|
|
@@ -2,21 +2,24 @@ from functools import lru_cache
|
|
|
2
2
|
from typing import Any, Dict, Optional, Union
|
|
3
3
|
|
|
4
4
|
from huggingface_hub import constants
|
|
5
|
+
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
5
6
|
from huggingface_hub.inference._common import RequestParameters
|
|
6
7
|
from huggingface_hub.utils import build_hf_headers, get_token, logging
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
logger = logging.get_logger(__name__)
|
|
10
11
|
|
|
11
|
-
|
|
12
12
|
# Dev purposes only.
|
|
13
13
|
# If you want to try to run inference for a new model locally before it's registered on huggingface.co
|
|
14
14
|
# for a given Inference Provider, you can add it to the following dictionary.
|
|
15
|
-
|
|
16
|
-
# "HF model ID" => "Model ID on Inference Provider's side"
|
|
15
|
+
HARDCODED_MODEL_INFERENCE_MAPPING: Dict[str, Dict[str, InferenceProviderMapping]] = {
|
|
16
|
+
# "HF model ID" => InferenceProviderMapping object initialized with "Model ID on Inference Provider's side"
|
|
17
17
|
#
|
|
18
18
|
# Example:
|
|
19
|
-
# "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
|
|
19
|
+
# "Qwen/Qwen2.5-Coder-32B-Instruct": InferenceProviderMapping(hf_model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
|
|
20
|
+
# provider_id="Qwen2.5-Coder-32B-Instruct",
|
|
21
|
+
# task="conversational",
|
|
22
|
+
# status="live")
|
|
20
23
|
"cerebras": {},
|
|
21
24
|
"cohere": {},
|
|
22
25
|
"fal-ai": {},
|
|
@@ -61,28 +64,30 @@ class TaskProviderHelper:
|
|
|
61
64
|
api_key = self._prepare_api_key(api_key)
|
|
62
65
|
|
|
63
66
|
# mapped model from HF model ID
|
|
64
|
-
|
|
67
|
+
provider_mapping_info = self._prepare_mapping_info(model)
|
|
65
68
|
|
|
66
69
|
# default HF headers + user headers (to customize in subclasses)
|
|
67
70
|
headers = self._prepare_headers(headers, api_key)
|
|
68
71
|
|
|
69
72
|
# routed URL if HF token, or direct URL (to customize in '_prepare_route' in subclasses)
|
|
70
|
-
url = self._prepare_url(api_key,
|
|
73
|
+
url = self._prepare_url(api_key, provider_mapping_info.provider_id)
|
|
71
74
|
|
|
72
75
|
# prepare payload (to customize in subclasses)
|
|
73
|
-
payload = self._prepare_payload_as_dict(inputs, parameters,
|
|
76
|
+
payload = self._prepare_payload_as_dict(inputs, parameters, provider_mapping_info=provider_mapping_info)
|
|
74
77
|
if payload is not None:
|
|
75
78
|
payload = recursive_merge(payload, extra_payload or {})
|
|
76
79
|
|
|
77
80
|
# body data (to customize in subclasses)
|
|
78
|
-
data = self._prepare_payload_as_bytes(inputs, parameters,
|
|
81
|
+
data = self._prepare_payload_as_bytes(inputs, parameters, provider_mapping_info, extra_payload)
|
|
79
82
|
|
|
80
83
|
# check if both payload and data are set and return
|
|
81
84
|
if payload is not None and data is not None:
|
|
82
85
|
raise ValueError("Both payload and data cannot be set in the same request.")
|
|
83
86
|
if payload is None and data is None:
|
|
84
87
|
raise ValueError("Either payload or data must be set in the request.")
|
|
85
|
-
return RequestParameters(
|
|
88
|
+
return RequestParameters(
|
|
89
|
+
url=url, task=self.task, model=provider_mapping_info.provider_id, json=payload, data=data, headers=headers
|
|
90
|
+
)
|
|
86
91
|
|
|
87
92
|
def get_response(
|
|
88
93
|
self,
|
|
@@ -107,7 +112,7 @@ class TaskProviderHelper:
|
|
|
107
112
|
)
|
|
108
113
|
return api_key
|
|
109
114
|
|
|
110
|
-
def
|
|
115
|
+
def _prepare_mapping_info(self, model: Optional[str]) -> InferenceProviderMapping:
|
|
111
116
|
"""Return the mapped model ID to use for the request.
|
|
112
117
|
|
|
113
118
|
Usually not overwritten in subclasses."""
|
|
@@ -115,8 +120,8 @@ class TaskProviderHelper:
|
|
|
115
120
|
raise ValueError(f"Please provide an HF model ID supported by {self.provider}.")
|
|
116
121
|
|
|
117
122
|
# hardcoded mapping for local testing
|
|
118
|
-
if
|
|
119
|
-
return
|
|
123
|
+
if HARDCODED_MODEL_INFERENCE_MAPPING.get(self.provider, {}).get(model):
|
|
124
|
+
return HARDCODED_MODEL_INFERENCE_MAPPING[self.provider][model]
|
|
120
125
|
|
|
121
126
|
provider_mapping = _fetch_inference_provider_mapping(model).get(self.provider)
|
|
122
127
|
if provider_mapping is None:
|
|
@@ -132,7 +137,7 @@ class TaskProviderHelper:
|
|
|
132
137
|
logger.warning(
|
|
133
138
|
f"Model {model} is in staging mode for provider {self.provider}. Meant for test purposes only."
|
|
134
139
|
)
|
|
135
|
-
return provider_mapping
|
|
140
|
+
return provider_mapping
|
|
136
141
|
|
|
137
142
|
def _prepare_headers(self, headers: Dict, api_key: str) -> Dict:
|
|
138
143
|
"""Return the headers to use for the request.
|
|
@@ -168,7 +173,9 @@ class TaskProviderHelper:
|
|
|
168
173
|
"""
|
|
169
174
|
return ""
|
|
170
175
|
|
|
171
|
-
def _prepare_payload_as_dict(
|
|
176
|
+
def _prepare_payload_as_dict(
|
|
177
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
178
|
+
) -> Optional[Dict]:
|
|
172
179
|
"""Return the payload to use for the request, as a dict.
|
|
173
180
|
|
|
174
181
|
Override this method in subclasses for customized payloads.
|
|
@@ -177,7 +184,11 @@ class TaskProviderHelper:
|
|
|
177
184
|
return None
|
|
178
185
|
|
|
179
186
|
def _prepare_payload_as_bytes(
|
|
180
|
-
self,
|
|
187
|
+
self,
|
|
188
|
+
inputs: Any,
|
|
189
|
+
parameters: Dict,
|
|
190
|
+
provider_mapping_info: InferenceProviderMapping,
|
|
191
|
+
extra_payload: Optional[Dict],
|
|
181
192
|
) -> Optional[bytes]:
|
|
182
193
|
"""Return the body to use for the request, as bytes.
|
|
183
194
|
|
|
@@ -199,8 +210,10 @@ class BaseConversationalTask(TaskProviderHelper):
|
|
|
199
210
|
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
200
211
|
return "/v1/chat/completions"
|
|
201
212
|
|
|
202
|
-
def _prepare_payload_as_dict(
|
|
203
|
-
|
|
213
|
+
def _prepare_payload_as_dict(
|
|
214
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
215
|
+
) -> Optional[Dict]:
|
|
216
|
+
return {"messages": inputs, **filter_none(parameters), "model": provider_mapping_info.provider_id}
|
|
204
217
|
|
|
205
218
|
|
|
206
219
|
class BaseTextGenerationTask(TaskProviderHelper):
|
|
@@ -215,8 +228,10 @@ class BaseTextGenerationTask(TaskProviderHelper):
|
|
|
215
228
|
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
216
229
|
return "/v1/completions"
|
|
217
230
|
|
|
218
|
-
def _prepare_payload_as_dict(
|
|
219
|
-
|
|
231
|
+
def _prepare_payload_as_dict(
|
|
232
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
233
|
+
) -> Optional[Dict]:
|
|
234
|
+
return {"prompt": inputs, **filter_none(parameters), "model": provider_mapping_info.provider_id}
|
|
220
235
|
|
|
221
236
|
|
|
222
237
|
@lru_cache(maxsize=None)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from typing import Any, Dict, Optional, Union
|
|
3
3
|
|
|
4
|
+
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
4
5
|
from huggingface_hub.inference._common import RequestParameters, _as_dict
|
|
5
6
|
from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
|
|
6
7
|
from huggingface_hub.utils import logging
|
|
@@ -27,7 +28,9 @@ class BlackForestLabsTextToImageTask(TaskProviderHelper):
|
|
|
27
28
|
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
28
29
|
return f"/v1/{mapped_model}"
|
|
29
30
|
|
|
30
|
-
def _prepare_payload_as_dict(
|
|
31
|
+
def _prepare_payload_as_dict(
|
|
32
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
33
|
+
) -> Optional[Dict]:
|
|
31
34
|
parameters = filter_none(parameters)
|
|
32
35
|
if "num_inference_steps" in parameters:
|
|
33
36
|
parameters["steps"] = parameters.pop("num_inference_steps")
|
|
@@ -4,6 +4,8 @@ from abc import ABC
|
|
|
4
4
|
from typing import Any, Dict, Optional, Union
|
|
5
5
|
from urllib.parse import urlparse
|
|
6
6
|
|
|
7
|
+
from huggingface_hub import constants
|
|
8
|
+
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
7
9
|
from huggingface_hub.inference._common import RequestParameters, _as_dict
|
|
8
10
|
from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
|
|
9
11
|
from huggingface_hub.utils import get_session, hf_raise_for_status
|
|
@@ -34,7 +36,9 @@ class FalAIAutomaticSpeechRecognitionTask(FalAITask):
|
|
|
34
36
|
def __init__(self):
|
|
35
37
|
super().__init__("automatic-speech-recognition")
|
|
36
38
|
|
|
37
|
-
def _prepare_payload_as_dict(
|
|
39
|
+
def _prepare_payload_as_dict(
|
|
40
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
41
|
+
) -> Optional[Dict]:
|
|
38
42
|
if isinstance(inputs, str) and inputs.startswith(("http://", "https://")):
|
|
39
43
|
# If input is a URL, pass it directly
|
|
40
44
|
audio_url = inputs
|
|
@@ -61,14 +65,31 @@ class FalAITextToImageTask(FalAITask):
|
|
|
61
65
|
def __init__(self):
|
|
62
66
|
super().__init__("text-to-image")
|
|
63
67
|
|
|
64
|
-
def _prepare_payload_as_dict(
|
|
65
|
-
parameters
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
def _prepare_payload_as_dict(
|
|
69
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
70
|
+
) -> Optional[Dict]:
|
|
71
|
+
payload: Dict[str, Any] = {
|
|
72
|
+
"prompt": inputs,
|
|
73
|
+
**filter_none(parameters),
|
|
74
|
+
}
|
|
75
|
+
if "width" in payload and "height" in payload:
|
|
76
|
+
payload["image_size"] = {
|
|
77
|
+
"width": payload.pop("width"),
|
|
78
|
+
"height": payload.pop("height"),
|
|
70
79
|
}
|
|
71
|
-
|
|
80
|
+
if provider_mapping_info.adapter_weights_path is not None:
|
|
81
|
+
lora_path = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
|
|
82
|
+
repo_id=provider_mapping_info.hf_model_id,
|
|
83
|
+
revision="main",
|
|
84
|
+
filename=provider_mapping_info.adapter_weights_path,
|
|
85
|
+
)
|
|
86
|
+
payload["loras"] = [{"path": lora_path, "scale": 1}]
|
|
87
|
+
if provider_mapping_info.provider_id == "fal-ai/lora":
|
|
88
|
+
# little hack: fal requires the base model for stable-diffusion-based loras but not for flux-based
|
|
89
|
+
# See payloads in https://fal.ai/models/fal-ai/lora/api vs https://fal.ai/models/fal-ai/flux-lora/api
|
|
90
|
+
payload["model_name"] = "stabilityai/stable-diffusion-xl-base-1.0"
|
|
91
|
+
|
|
92
|
+
return payload
|
|
72
93
|
|
|
73
94
|
def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
|
|
74
95
|
url = _as_dict(response)["images"][0]["url"]
|
|
@@ -79,8 +100,10 @@ class FalAITextToSpeechTask(FalAITask):
|
|
|
79
100
|
def __init__(self):
|
|
80
101
|
super().__init__("text-to-speech")
|
|
81
102
|
|
|
82
|
-
def _prepare_payload_as_dict(
|
|
83
|
-
|
|
103
|
+
def _prepare_payload_as_dict(
|
|
104
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
105
|
+
) -> Optional[Dict]:
|
|
106
|
+
return {"text": inputs, **filter_none(parameters)}
|
|
84
107
|
|
|
85
108
|
def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
|
|
86
109
|
url = _as_dict(response)["audio"]["url"]
|
|
@@ -104,7 +127,9 @@ class FalAITextToVideoTask(FalAITask):
|
|
|
104
127
|
return f"/{mapped_model}?_subdomain=queue"
|
|
105
128
|
return f"/{mapped_model}"
|
|
106
129
|
|
|
107
|
-
def _prepare_payload_as_dict(
|
|
130
|
+
def _prepare_payload_as_dict(
|
|
131
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
132
|
+
) -> Optional[Dict]:
|
|
108
133
|
return {"prompt": inputs, **filter_none(parameters)}
|
|
109
134
|
|
|
110
135
|
def get_response(
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from functools import lru_cache
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, Dict, Optional
|
|
4
|
+
from typing import Any, Dict, Optional, Union
|
|
5
5
|
|
|
6
6
|
from huggingface_hub import constants
|
|
7
|
-
from huggingface_hub.
|
|
7
|
+
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
8
|
+
from huggingface_hub.inference._common import RequestParameters, _b64_encode, _bytes_to_dict, _open_as_binary
|
|
8
9
|
from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none
|
|
9
10
|
from huggingface_hub.utils import build_hf_headers, get_session, get_token, hf_raise_for_status
|
|
10
11
|
|
|
@@ -23,9 +24,9 @@ class HFInferenceTask(TaskProviderHelper):
|
|
|
23
24
|
# special case: for HF Inference we allow not providing an API key
|
|
24
25
|
return api_key or get_token() # type: ignore[return-value]
|
|
25
26
|
|
|
26
|
-
def
|
|
27
|
+
def _prepare_mapping_info(self, model: Optional[str]) -> InferenceProviderMapping:
|
|
27
28
|
if model is not None and model.startswith(("http://", "https://")):
|
|
28
|
-
return model
|
|
29
|
+
return InferenceProviderMapping(providerId=model, hf_model_id=model, task=self.task, status="live")
|
|
29
30
|
model_id = model if model is not None else _fetch_recommended_models().get(self.task)
|
|
30
31
|
if model_id is None:
|
|
31
32
|
raise ValueError(
|
|
@@ -33,7 +34,7 @@ class HFInferenceTask(TaskProviderHelper):
|
|
|
33
34
|
" explicitly. Visit https://huggingface.co/tasks for more info."
|
|
34
35
|
)
|
|
35
36
|
_check_supported_task(model_id, self.task)
|
|
36
|
-
return model_id
|
|
37
|
+
return InferenceProviderMapping(providerId=model_id, hf_model_id=model_id, task=self.task, status="live")
|
|
37
38
|
|
|
38
39
|
def _prepare_url(self, api_key: str, mapped_model: str) -> str:
|
|
39
40
|
# hf-inference provider can handle URLs (e.g. Inference Endpoints or TGI deployment)
|
|
@@ -41,13 +42,15 @@ class HFInferenceTask(TaskProviderHelper):
|
|
|
41
42
|
return mapped_model
|
|
42
43
|
return (
|
|
43
44
|
# Feature-extraction and sentence-similarity are the only cases where we handle models with several tasks.
|
|
44
|
-
f"{self.base_url}/pipeline/{self.task}
|
|
45
|
+
f"{self.base_url}/models/{mapped_model}/pipeline/{self.task}"
|
|
45
46
|
if self.task in ("feature-extraction", "sentence-similarity")
|
|
46
47
|
# Otherwise, we use the default endpoint
|
|
47
48
|
else f"{self.base_url}/models/{mapped_model}"
|
|
48
49
|
)
|
|
49
50
|
|
|
50
|
-
def _prepare_payload_as_dict(
|
|
51
|
+
def _prepare_payload_as_dict(
|
|
52
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
53
|
+
) -> Optional[Dict]:
|
|
51
54
|
if isinstance(inputs, bytes):
|
|
52
55
|
raise ValueError(f"Unexpected binary input for task {self.task}.")
|
|
53
56
|
if isinstance(inputs, Path):
|
|
@@ -56,11 +59,17 @@ class HFInferenceTask(TaskProviderHelper):
|
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
class HFInferenceBinaryInputTask(HFInferenceTask):
|
|
59
|
-
def _prepare_payload_as_dict(
|
|
62
|
+
def _prepare_payload_as_dict(
|
|
63
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
64
|
+
) -> Optional[Dict]:
|
|
60
65
|
return None
|
|
61
66
|
|
|
62
67
|
def _prepare_payload_as_bytes(
|
|
63
|
-
self,
|
|
68
|
+
self,
|
|
69
|
+
inputs: Any,
|
|
70
|
+
parameters: Dict,
|
|
71
|
+
provider_mapping_info: InferenceProviderMapping,
|
|
72
|
+
extra_payload: Optional[Dict],
|
|
64
73
|
) -> Optional[bytes]:
|
|
65
74
|
parameters = filter_none({k: v for k, v in parameters.items() if v is not None})
|
|
66
75
|
extra_payload = extra_payload or {}
|
|
@@ -82,9 +91,12 @@ class HFInferenceBinaryInputTask(HFInferenceTask):
|
|
|
82
91
|
|
|
83
92
|
class HFInferenceConversational(HFInferenceTask):
|
|
84
93
|
def __init__(self):
|
|
85
|
-
super().__init__("
|
|
94
|
+
super().__init__("conversational")
|
|
86
95
|
|
|
87
|
-
def _prepare_payload_as_dict(
|
|
96
|
+
def _prepare_payload_as_dict(
|
|
97
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
98
|
+
) -> Optional[Dict]:
|
|
99
|
+
mapped_model = provider_mapping_info.provider_id
|
|
88
100
|
payload_model = parameters.get("model") or mapped_model
|
|
89
101
|
|
|
90
102
|
if payload_model is None or payload_model.startswith(("http://", "https://")):
|
|
@@ -165,3 +177,13 @@ def _check_supported_task(model: str, task: str) -> None:
|
|
|
165
177
|
f"Model '{model}' doesn't support task '{task}'. Supported tasks: '{pipeline_tag}', got: '{task}'"
|
|
166
178
|
)
|
|
167
179
|
return
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class HFInferenceFeatureExtractionTask(HFInferenceTask):
|
|
183
|
+
def __init__(self):
|
|
184
|
+
super().__init__("feature-extraction")
|
|
185
|
+
|
|
186
|
+
def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
|
|
187
|
+
if isinstance(response, bytes):
|
|
188
|
+
return _bytes_to_dict(response)
|
|
189
|
+
return response
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
from typing import Any, Dict, Optional, Union
|
|
3
3
|
|
|
4
|
+
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
4
5
|
from huggingface_hub.inference._common import RequestParameters, _as_dict
|
|
5
6
|
from huggingface_hub.inference._providers._common import BaseConversationalTask, TaskProviderHelper, filter_none
|
|
6
7
|
|
|
@@ -12,7 +13,10 @@ class HyperbolicTextToImageTask(TaskProviderHelper):
|
|
|
12
13
|
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
13
14
|
return "/v1/images/generations"
|
|
14
15
|
|
|
15
|
-
def _prepare_payload_as_dict(
|
|
16
|
+
def _prepare_payload_as_dict(
|
|
17
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
18
|
+
) -> Optional[Dict]:
|
|
19
|
+
mapped_model = provider_mapping_info.provider_id
|
|
16
20
|
parameters = filter_none(parameters)
|
|
17
21
|
if "num_inference_steps" in parameters:
|
|
18
22
|
parameters["steps"] = parameters.pop("num_inference_steps")
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
from typing import Any, Dict, Optional, Union
|
|
3
3
|
|
|
4
|
+
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
4
5
|
from huggingface_hub.inference._common import RequestParameters, _as_dict
|
|
5
6
|
from huggingface_hub.inference._providers._common import (
|
|
6
7
|
BaseConversationalTask,
|
|
@@ -14,6 +15,16 @@ class NebiusTextGenerationTask(BaseTextGenerationTask):
|
|
|
14
15
|
def __init__(self):
|
|
15
16
|
super().__init__(provider="nebius", base_url="https://api.studio.nebius.ai")
|
|
16
17
|
|
|
18
|
+
def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
|
|
19
|
+
output = _as_dict(response)["choices"][0]
|
|
20
|
+
return {
|
|
21
|
+
"generated_text": output["text"],
|
|
22
|
+
"details": {
|
|
23
|
+
"finish_reason": output.get("finish_reason"),
|
|
24
|
+
"seed": output.get("seed"),
|
|
25
|
+
},
|
|
26
|
+
}
|
|
27
|
+
|
|
17
28
|
|
|
18
29
|
class NebiusConversationalTask(BaseConversationalTask):
|
|
19
30
|
def __init__(self):
|
|
@@ -27,7 +38,10 @@ class NebiusTextToImageTask(TaskProviderHelper):
|
|
|
27
38
|
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
28
39
|
return "/v1/images/generations"
|
|
29
40
|
|
|
30
|
-
def _prepare_payload_as_dict(
|
|
41
|
+
def _prepare_payload_as_dict(
|
|
42
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
43
|
+
) -> Optional[Dict]:
|
|
44
|
+
mapped_model = provider_mapping_info.provider_id
|
|
31
45
|
parameters = filter_none(parameters)
|
|
32
46
|
if "guidance_scale" in parameters:
|
|
33
47
|
parameters.pop("guidance_scale")
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Any, Dict, Optional, Union
|
|
2
2
|
|
|
3
|
+
from huggingface_hub.hf_api import InferenceProviderMapping
|
|
3
4
|
from huggingface_hub.inference._common import RequestParameters, _as_dict
|
|
4
5
|
from huggingface_hub.inference._providers._common import (
|
|
5
6
|
BaseConversationalTask,
|
|
@@ -22,6 +23,16 @@ class NovitaTextGenerationTask(BaseTextGenerationTask):
|
|
|
22
23
|
# there is no v1/ route for novita
|
|
23
24
|
return "/v3/openai/completions"
|
|
24
25
|
|
|
26
|
+
def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
|
|
27
|
+
output = _as_dict(response)["choices"][0]
|
|
28
|
+
return {
|
|
29
|
+
"generated_text": output["text"],
|
|
30
|
+
"details": {
|
|
31
|
+
"finish_reason": output.get("finish_reason"),
|
|
32
|
+
"seed": output.get("seed"),
|
|
33
|
+
},
|
|
34
|
+
}
|
|
35
|
+
|
|
25
36
|
|
|
26
37
|
class NovitaConversationalTask(BaseConversationalTask):
|
|
27
38
|
def __init__(self):
|
|
@@ -39,7 +50,9 @@ class NovitaTextToVideoTask(TaskProviderHelper):
|
|
|
39
50
|
def _prepare_route(self, mapped_model: str, api_key: str) -> str:
|
|
40
51
|
return f"/v3/hf/{mapped_model}"
|
|
41
52
|
|
|
42
|
-
def _prepare_payload_as_dict(
|
|
53
|
+
def _prepare_payload_as_dict(
|
|
54
|
+
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
55
|
+
) -> Optional[Dict]:
|
|
43
56
|
return {"prompt": inputs, **filter_none(parameters)}
|
|
44
57
|
|
|
45
58
|
def get_response(self, response: Union[bytes, Dict], request_params: Optional[RequestParameters] = None) -> Any:
|