huggingface-hub 0.27.0rc1__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +418 -12
- huggingface_hub/_commit_api.py +33 -4
- huggingface_hub/_inference_endpoints.py +8 -2
- huggingface_hub/_local_folder.py +14 -3
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/upload_large_folder.py +1 -1
- huggingface_hub/constants.py +7 -2
- huggingface_hub/file_download.py +1 -2
- huggingface_hub/hf_api.py +65 -84
- huggingface_hub/hub_mixin.py +12 -9
- huggingface_hub/inference/_client.py +706 -450
- huggingface_hub/inference/_common.py +32 -64
- huggingface_hub/inference/_generated/_async_client.py +722 -470
- huggingface_hub/inference/_generated/types/__init__.py +1 -0
- huggingface_hub/inference/_generated/types/image_to_image.py +3 -3
- huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
- huggingface_hub/inference/_generated/types/text_to_image.py +3 -3
- huggingface_hub/inference/_generated/types/text_to_speech.py +3 -6
- huggingface_hub/inference/_generated/types/text_to_video.py +47 -0
- huggingface_hub/inference/_generated/types/visual_question_answering.py +1 -1
- huggingface_hub/inference/_providers/__init__.py +89 -0
- huggingface_hub/inference/_providers/fal_ai.py +159 -0
- huggingface_hub/inference/_providers/hf_inference.py +202 -0
- huggingface_hub/inference/_providers/replicate.py +148 -0
- huggingface_hub/inference/_providers/sambanova.py +89 -0
- huggingface_hub/inference/_providers/together.py +153 -0
- huggingface_hub/py.typed +0 -0
- huggingface_hub/repocard.py +1 -1
- huggingface_hub/repocard_data.py +2 -1
- huggingface_hub/serialization/_base.py +1 -1
- huggingface_hub/serialization/_torch.py +1 -1
- huggingface_hub/utils/_fixes.py +25 -13
- huggingface_hub/utils/_http.py +3 -3
- huggingface_hub/utils/logging.py +1 -1
- {huggingface_hub-0.27.0rc1.dist-info → huggingface_hub-0.28.0.dist-info}/METADATA +4 -4
- {huggingface_hub-0.27.0rc1.dist-info → huggingface_hub-0.28.0.dist-info}/RECORD +40 -32
- {huggingface_hub-0.27.0rc1.dist-info → huggingface_hub-0.28.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.27.0rc1.dist-info → huggingface_hub-0.28.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.27.0rc1.dist-info → huggingface_hub-0.28.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.27.0rc1.dist-info → huggingface_hub-0.28.0.dist-info}/top_level.txt +0 -0
|
@@ -149,6 +149,7 @@ from .text_to_speech import (
|
|
|
149
149
|
TextToSpeechOutput,
|
|
150
150
|
TextToSpeechParameters,
|
|
151
151
|
)
|
|
152
|
+
from .text_to_video import TextToVideoInput, TextToVideoOutput, TextToVideoParameters
|
|
152
153
|
from .token_classification import (
|
|
153
154
|
TokenClassificationAggregationStrategy,
|
|
154
155
|
TokenClassificationInput,
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Optional
|
|
8
8
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
@@ -25,8 +25,8 @@ class ImageToImageParameters(BaseInferenceType):
|
|
|
25
25
|
"""For diffusion models. A higher guidance scale value encourages the model to generate
|
|
26
26
|
images closely linked to the text prompt at the expense of lower image quality.
|
|
27
27
|
"""
|
|
28
|
-
negative_prompt: Optional[
|
|
29
|
-
"""One
|
|
28
|
+
negative_prompt: Optional[str] = None
|
|
29
|
+
"""One prompt to guide what NOT to include in image generation."""
|
|
30
30
|
num_inference_steps: Optional[int] = None
|
|
31
31
|
"""For diffusion models. The number of denoising steps. More denoising steps usually lead to
|
|
32
32
|
a higher quality image at the expense of slower inference.
|
|
@@ -97,6 +97,5 @@ class TextToAudioOutput(BaseInferenceType):
|
|
|
97
97
|
|
|
98
98
|
audio: Any
|
|
99
99
|
"""The generated audio waveform."""
|
|
100
|
-
sampling_rate:
|
|
101
|
-
text_to_audio_output_sampling_rate: Optional[float] = None
|
|
100
|
+
sampling_rate: float
|
|
102
101
|
"""The sampling rate of the generated audio waveform."""
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Optional
|
|
8
8
|
|
|
9
9
|
from .base import BaseInferenceType
|
|
10
10
|
|
|
@@ -25,8 +25,8 @@ class TextToImageParameters(BaseInferenceType):
|
|
|
25
25
|
"""A higher guidance scale value encourages the model to generate images closely linked to
|
|
26
26
|
the text prompt, but values too high may cause saturation and other artifacts.
|
|
27
27
|
"""
|
|
28
|
-
negative_prompt: Optional[
|
|
29
|
-
"""One
|
|
28
|
+
negative_prompt: Optional[str] = None
|
|
29
|
+
"""One prompt to guide what NOT to include in image generation."""
|
|
30
30
|
num_inference_steps: Optional[int] = None
|
|
31
31
|
"""The number of denoising steps. More denoising steps usually lead to a higher quality
|
|
32
32
|
image at the expense of slower inference.
|
|
@@ -93,12 +93,9 @@ class TextToSpeechInput(BaseInferenceType):
|
|
|
93
93
|
|
|
94
94
|
@dataclass
|
|
95
95
|
class TextToSpeechOutput(BaseInferenceType):
|
|
96
|
-
"""Outputs for Text
|
|
97
|
-
Outputs of inference for the Text To Audio task
|
|
98
|
-
"""
|
|
96
|
+
"""Outputs of inference for the Text To Speech task"""
|
|
99
97
|
|
|
100
98
|
audio: Any
|
|
101
|
-
"""The generated audio
|
|
102
|
-
sampling_rate:
|
|
103
|
-
text_to_speech_output_sampling_rate: Optional[float] = None
|
|
99
|
+
"""The generated audio"""
|
|
100
|
+
sampling_rate: Optional[float] = None
|
|
104
101
|
"""The sampling rate of the generated audio waveform."""
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Inference code generated from the JSON schema spec in @huggingface/tasks.
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
|
+
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, List, Optional
|
|
8
|
+
|
|
9
|
+
from .base import BaseInferenceType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class TextToVideoParameters(BaseInferenceType):
|
|
14
|
+
"""Additional inference parameters for Text To Video"""
|
|
15
|
+
|
|
16
|
+
guidance_scale: Optional[float] = None
|
|
17
|
+
"""A higher guidance scale value encourages the model to generate videos closely linked to
|
|
18
|
+
the text prompt, but values too high may cause saturation and other artifacts.
|
|
19
|
+
"""
|
|
20
|
+
negative_prompt: Optional[List[str]] = None
|
|
21
|
+
"""One or several prompt to guide what NOT to include in video generation."""
|
|
22
|
+
num_frames: Optional[float] = None
|
|
23
|
+
"""The num_frames parameter determines how many video frames are generated."""
|
|
24
|
+
num_inference_steps: Optional[int] = None
|
|
25
|
+
"""The number of denoising steps. More denoising steps usually lead to a higher quality
|
|
26
|
+
video at the expense of slower inference.
|
|
27
|
+
"""
|
|
28
|
+
seed: Optional[int] = None
|
|
29
|
+
"""Seed for the random number generator."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class TextToVideoInput(BaseInferenceType):
|
|
34
|
+
"""Inputs for Text To Video inference"""
|
|
35
|
+
|
|
36
|
+
inputs: str
|
|
37
|
+
"""The input text data (sometimes called "prompt")"""
|
|
38
|
+
parameters: Optional[TextToVideoParameters] = None
|
|
39
|
+
"""Additional inference parameters for Text To Video"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class TextToVideoOutput(BaseInferenceType):
|
|
44
|
+
"""Outputs of inference for the Text To Video task"""
|
|
45
|
+
|
|
46
|
+
video: Any
|
|
47
|
+
"""The generated video returned as raw bytes in the payload."""
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from typing import Dict, Literal
|
|
2
|
+
|
|
3
|
+
from .._common import TaskProviderHelper
|
|
4
|
+
from .fal_ai import FalAIAutomaticSpeechRecognitionTask, FalAITextToImageTask, FalAITextToVideoTask
|
|
5
|
+
from .hf_inference import HFInferenceBinaryInputTask, HFInferenceConversational, HFInferenceTask
|
|
6
|
+
from .replicate import ReplicateTask, ReplicateTextToSpeechTask
|
|
7
|
+
from .sambanova import SambanovaConversationalTask
|
|
8
|
+
from .together import TogetherTextGenerationTask, TogetherTextToImageTask
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
PROVIDER_T = Literal[
|
|
12
|
+
"fal-ai",
|
|
13
|
+
"hf-inference",
|
|
14
|
+
"replicate",
|
|
15
|
+
"sambanova",
|
|
16
|
+
"together",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
|
|
20
|
+
"fal-ai": {
|
|
21
|
+
"text-to-image": FalAITextToImageTask(),
|
|
22
|
+
"automatic-speech-recognition": FalAIAutomaticSpeechRecognitionTask(),
|
|
23
|
+
"text-to-video": FalAITextToVideoTask(),
|
|
24
|
+
},
|
|
25
|
+
"hf-inference": {
|
|
26
|
+
"text-to-image": HFInferenceTask("text-to-image"),
|
|
27
|
+
"conversational": HFInferenceConversational(),
|
|
28
|
+
"text-generation": HFInferenceTask("text-generation"),
|
|
29
|
+
"text-classification": HFInferenceTask("text-classification"),
|
|
30
|
+
"question-answering": HFInferenceTask("question-answering"),
|
|
31
|
+
"audio-classification": HFInferenceBinaryInputTask("audio-classification"),
|
|
32
|
+
"automatic-speech-recognition": HFInferenceBinaryInputTask("automatic-speech-recognition"),
|
|
33
|
+
"fill-mask": HFInferenceTask("fill-mask"),
|
|
34
|
+
"feature-extraction": HFInferenceTask("feature-extraction"),
|
|
35
|
+
"image-classification": HFInferenceBinaryInputTask("image-classification"),
|
|
36
|
+
"image-segmentation": HFInferenceBinaryInputTask("image-segmentation"),
|
|
37
|
+
"document-question-answering": HFInferenceTask("document-question-answering"),
|
|
38
|
+
"image-to-text": HFInferenceTask("image-to-text"),
|
|
39
|
+
"object-detection": HFInferenceBinaryInputTask("object-detection"),
|
|
40
|
+
"audio-to-audio": HFInferenceTask("audio-to-audio"),
|
|
41
|
+
"zero-shot-image-classification": HFInferenceBinaryInputTask("zero-shot-image-classification"),
|
|
42
|
+
"zero-shot-classification": HFInferenceTask("zero-shot-classification"),
|
|
43
|
+
"image-to-image": HFInferenceBinaryInputTask("image-to-image"),
|
|
44
|
+
"sentence-similarity": HFInferenceTask("sentence-similarity"),
|
|
45
|
+
"table-question-answering": HFInferenceTask("table-question-answering"),
|
|
46
|
+
"tabular-classification": HFInferenceTask("tabular-classification"),
|
|
47
|
+
"text-to-speech": HFInferenceTask("text-to-speech"),
|
|
48
|
+
"token-classification": HFInferenceTask("token-classification"),
|
|
49
|
+
"translation": HFInferenceTask("translation"),
|
|
50
|
+
"summarization": HFInferenceTask("summarization"),
|
|
51
|
+
"visual-question-answering": HFInferenceBinaryInputTask("visual-question-answering"),
|
|
52
|
+
},
|
|
53
|
+
"replicate": {
|
|
54
|
+
"text-to-image": ReplicateTask("text-to-image"),
|
|
55
|
+
"text-to-speech": ReplicateTextToSpeechTask(),
|
|
56
|
+
"text-to-video": ReplicateTask("text-to-video"),
|
|
57
|
+
},
|
|
58
|
+
"sambanova": {
|
|
59
|
+
"conversational": SambanovaConversationalTask(),
|
|
60
|
+
},
|
|
61
|
+
"together": {
|
|
62
|
+
"text-to-image": TogetherTextToImageTask(),
|
|
63
|
+
"conversational": TogetherTextGenerationTask("conversational"),
|
|
64
|
+
"text-generation": TogetherTextGenerationTask("text-generation"),
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_provider_helper(provider: PROVIDER_T, task: str) -> TaskProviderHelper:
|
|
70
|
+
"""Get provider helper instance by name and task.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
provider (str): Name of the provider
|
|
74
|
+
task (str): Name of the task
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
TaskProviderHelper: Helper instance for the specified provider and task
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ValueError: If provider or task is not supported
|
|
81
|
+
"""
|
|
82
|
+
if provider not in PROVIDERS:
|
|
83
|
+
raise ValueError(f"Provider '{provider}' not supported. Available providers: {list(PROVIDERS.keys())}")
|
|
84
|
+
if task not in PROVIDERS[provider]:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"Task '{task}' not supported for provider '{provider}'. "
|
|
87
|
+
f"Available tasks: {list(PROVIDERS[provider].keys())}"
|
|
88
|
+
)
|
|
89
|
+
return PROVIDERS[provider][task]
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Any, Dict, Optional, Union
|
|
4
|
+
|
|
5
|
+
from huggingface_hub import constants
|
|
6
|
+
from huggingface_hub.inference._common import RequestParameters, TaskProviderHelper, _as_dict
|
|
7
|
+
from huggingface_hub.utils import build_hf_headers, get_session, get_token, logging
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = logging.get_logger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
BASE_URL = "https://fal.run"
|
|
14
|
+
|
|
15
|
+
SUPPORTED_MODELS = {
|
|
16
|
+
"automatic-speech-recognition": {
|
|
17
|
+
"openai/whisper-large-v3": "fal-ai/whisper",
|
|
18
|
+
},
|
|
19
|
+
"text-to-image": {
|
|
20
|
+
"black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell",
|
|
21
|
+
"black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev",
|
|
22
|
+
"playgroundai/playground-v2.5-1024px-aesthetic": "fal-ai/playground-v25",
|
|
23
|
+
"ByteDance/SDXL-Lightning": "fal-ai/lightning-models",
|
|
24
|
+
"PixArt-alpha/PixArt-Sigma-XL-2-1024-MS": "fal-ai/pixart-sigma",
|
|
25
|
+
"stabilityai/stable-diffusion-3-medium": "fal-ai/stable-diffusion-v3-medium",
|
|
26
|
+
"Warlord-K/Sana-1024": "fal-ai/sana",
|
|
27
|
+
"fal/AuraFlow-v0.2": "fal-ai/aura-flow",
|
|
28
|
+
"stabilityai/stable-diffusion-3.5-large": "fal-ai/stable-diffusion-v35-large",
|
|
29
|
+
"Kwai-Kolors/Kolors": "fal-ai/kolors",
|
|
30
|
+
},
|
|
31
|
+
"text-to-video": {
|
|
32
|
+
"genmo/mochi-1-preview": "fal-ai/mochi-v1",
|
|
33
|
+
"tencent/HunyuanVideo": "fal-ai/hunyuan-video",
|
|
34
|
+
},
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class FalAITask(TaskProviderHelper, ABC):
|
|
39
|
+
"""Base class for FalAI API tasks."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, task: str):
|
|
42
|
+
self.task = task
|
|
43
|
+
|
|
44
|
+
def prepare_request(
|
|
45
|
+
self,
|
|
46
|
+
*,
|
|
47
|
+
inputs: Any,
|
|
48
|
+
parameters: Dict[str, Any],
|
|
49
|
+
headers: Dict,
|
|
50
|
+
model: Optional[str],
|
|
51
|
+
api_key: Optional[str],
|
|
52
|
+
extra_payload: Optional[Dict[str, Any]] = None,
|
|
53
|
+
) -> RequestParameters:
|
|
54
|
+
if api_key is None:
|
|
55
|
+
api_key = get_token()
|
|
56
|
+
if api_key is None:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
"You must provide an api_key to work with fal.ai API or log in with `huggingface-cli login`."
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
mapped_model = self._map_model(model)
|
|
62
|
+
headers = {
|
|
63
|
+
**build_hf_headers(token=api_key),
|
|
64
|
+
**headers,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Route to the proxy if the api_key is a HF TOKEN
|
|
68
|
+
if api_key.startswith("hf_"):
|
|
69
|
+
base_url = constants.INFERENCE_PROXY_TEMPLATE.format(provider="fal-ai")
|
|
70
|
+
logger.info("Calling fal.ai provider through Hugging Face proxy.")
|
|
71
|
+
else:
|
|
72
|
+
base_url = BASE_URL
|
|
73
|
+
headers["authorization"] = f"Key {api_key}"
|
|
74
|
+
logger.info("Calling fal.ai provider directly.")
|
|
75
|
+
|
|
76
|
+
payload = self._prepare_payload(inputs, parameters=parameters)
|
|
77
|
+
|
|
78
|
+
return RequestParameters(
|
|
79
|
+
url=f"{base_url}/{mapped_model}",
|
|
80
|
+
task=self.task,
|
|
81
|
+
model=mapped_model,
|
|
82
|
+
json=payload,
|
|
83
|
+
data=None,
|
|
84
|
+
headers=headers,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _map_model(self, model: Optional[str]) -> str:
|
|
88
|
+
if model is None:
|
|
89
|
+
raise ValueError("Please provide a model available on FalAI.")
|
|
90
|
+
if self.task not in SUPPORTED_MODELS:
|
|
91
|
+
raise ValueError(f"Task {self.task} not supported with FalAI.")
|
|
92
|
+
mapped_model = SUPPORTED_MODELS[self.task].get(model)
|
|
93
|
+
if mapped_model is None:
|
|
94
|
+
raise ValueError(f"Model {model} is not supported with FalAI for task {self.task}.")
|
|
95
|
+
return mapped_model
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def _prepare_payload(self, inputs: Any, parameters: Dict[str, Any]) -> Dict[str, Any]: ...
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class FalAIAutomaticSpeechRecognitionTask(FalAITask):
|
|
102
|
+
def __init__(self):
|
|
103
|
+
super().__init__("automatic-speech-recognition")
|
|
104
|
+
|
|
105
|
+
def _prepare_payload(self, inputs: Any, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
|
106
|
+
if isinstance(inputs, str) and inputs.startswith(("http://", "https://")):
|
|
107
|
+
# If input is a URL, pass it directly
|
|
108
|
+
audio_url = inputs
|
|
109
|
+
else:
|
|
110
|
+
# If input is a file path, read it first
|
|
111
|
+
if isinstance(inputs, str):
|
|
112
|
+
with open(inputs, "rb") as f:
|
|
113
|
+
inputs = f.read()
|
|
114
|
+
|
|
115
|
+
audio_b64 = base64.b64encode(inputs).decode()
|
|
116
|
+
content_type = "audio/mpeg"
|
|
117
|
+
audio_url = f"data:{content_type};base64,{audio_b64}"
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
"audio_url": audio_url,
|
|
121
|
+
**{k: v for k, v in parameters.items() if v is not None},
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
def get_response(self, response: Union[bytes, Dict]) -> Any:
|
|
125
|
+
text = _as_dict(response)["text"]
|
|
126
|
+
if not isinstance(text, str):
|
|
127
|
+
raise ValueError(f"Unexpected output format from FalAI API. Expected string, got {type(text)}.")
|
|
128
|
+
return text
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class FalAITextToImageTask(FalAITask):
|
|
132
|
+
def __init__(self):
|
|
133
|
+
super().__init__("text-to-image")
|
|
134
|
+
|
|
135
|
+
def _prepare_payload(self, inputs: Any, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
|
136
|
+
parameters = {k: v for k, v in parameters.items() if v is not None}
|
|
137
|
+
if "image_size" not in parameters and "width" in parameters and "height" in parameters:
|
|
138
|
+
parameters["image_size"] = {
|
|
139
|
+
"width": parameters.pop("width"),
|
|
140
|
+
"height": parameters.pop("height"),
|
|
141
|
+
}
|
|
142
|
+
return {"prompt": inputs, **parameters}
|
|
143
|
+
|
|
144
|
+
def get_response(self, response: Union[bytes, Dict]) -> Any:
|
|
145
|
+
url = _as_dict(response)["images"][0]["url"]
|
|
146
|
+
return get_session().get(url).content
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class FalAITextToVideoTask(FalAITask):
|
|
150
|
+
def __init__(self):
|
|
151
|
+
super().__init__("text-to-video")
|
|
152
|
+
|
|
153
|
+
def _prepare_payload(self, inputs: Any, parameters: Dict[str, Any]) -> Dict[str, Any]:
|
|
154
|
+
parameters = {k: v for k, v in parameters.items() if v is not None}
|
|
155
|
+
return {"prompt": inputs, **parameters}
|
|
156
|
+
|
|
157
|
+
def get_response(self, response: Union[bytes, Dict]) -> Any:
|
|
158
|
+
url = _as_dict(response)["video"]["url"]
|
|
159
|
+
return get_session().get(url).content
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
|
+
|
|
4
|
+
from huggingface_hub.constants import ENDPOINT
|
|
5
|
+
from huggingface_hub.inference._common import RequestParameters, TaskProviderHelper, _b64_encode, _open_as_binary
|
|
6
|
+
from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## RECOMMENDED MODELS
|
|
10
|
+
|
|
11
|
+
# Will be globally fetched only once (see '_fetch_recommended_models')
|
|
12
|
+
_RECOMMENDED_MODELS: Optional[Dict[str, Optional[str]]] = None
|
|
13
|
+
|
|
14
|
+
BASE_URL = "https://api-inference.huggingface.co"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _first_or_none(items: List[Any]) -> Optional[Any]:
|
|
18
|
+
try:
|
|
19
|
+
return items[0] or None
|
|
20
|
+
except IndexError:
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _fetch_recommended_models() -> Dict[str, Optional[str]]:
|
|
25
|
+
global _RECOMMENDED_MODELS
|
|
26
|
+
if _RECOMMENDED_MODELS is None:
|
|
27
|
+
response = get_session().get(f"{ENDPOINT}/api/tasks", headers=build_hf_headers())
|
|
28
|
+
hf_raise_for_status(response)
|
|
29
|
+
_RECOMMENDED_MODELS = {
|
|
30
|
+
task: _first_or_none(details["widgetModels"]) for task, details in response.json().items()
|
|
31
|
+
}
|
|
32
|
+
return _RECOMMENDED_MODELS
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_recommended_model(task: str) -> str:
|
|
36
|
+
"""
|
|
37
|
+
Get the model Hugging Face recommends for the input task.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
task (`str`):
|
|
41
|
+
The Hugging Face task to get which model Hugging Face recommends.
|
|
42
|
+
All available tasks can be found [here](https://huggingface.co/tasks).
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
`str`: Name of the model recommended for the input task.
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
`ValueError`: If Hugging Face has no recommendation for the input task.
|
|
49
|
+
"""
|
|
50
|
+
model = _fetch_recommended_models().get(task)
|
|
51
|
+
if model is None:
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"Task {task} has no recommended model. Please specify a model"
|
|
54
|
+
" explicitly. Visit https://huggingface.co/tasks for more info."
|
|
55
|
+
)
|
|
56
|
+
return model
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class HFInferenceTask(TaskProviderHelper):
|
|
60
|
+
"""Base class for HF Inference API tasks."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, task: str):
|
|
63
|
+
self.task = task
|
|
64
|
+
|
|
65
|
+
def prepare_request(
|
|
66
|
+
self,
|
|
67
|
+
*,
|
|
68
|
+
inputs: Any,
|
|
69
|
+
parameters: Dict[str, Any],
|
|
70
|
+
headers: Dict,
|
|
71
|
+
model: Optional[str],
|
|
72
|
+
api_key: Optional[str],
|
|
73
|
+
extra_payload: Optional[Dict[str, Any]] = None,
|
|
74
|
+
) -> RequestParameters:
|
|
75
|
+
if extra_payload is None:
|
|
76
|
+
extra_payload = {}
|
|
77
|
+
mapped_model = self.map_model(model)
|
|
78
|
+
url = self.build_url(mapped_model)
|
|
79
|
+
data, json = self._prepare_payload(inputs, parameters=parameters, model=model, extra_payload=extra_payload)
|
|
80
|
+
headers = self.prepare_headers(headers=headers, api_key=api_key)
|
|
81
|
+
|
|
82
|
+
return RequestParameters(
|
|
83
|
+
url=url,
|
|
84
|
+
task=self.task,
|
|
85
|
+
model=mapped_model,
|
|
86
|
+
json=json,
|
|
87
|
+
data=data,
|
|
88
|
+
headers=headers,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def map_model(self, model: Optional[str]) -> str:
|
|
92
|
+
return model if model is not None else get_recommended_model(self.task)
|
|
93
|
+
|
|
94
|
+
def build_url(self, model: str) -> str:
|
|
95
|
+
# hf-inference provider can handle URLs (e.g. Inference Endpoints or TGI deployment)
|
|
96
|
+
if model.startswith(("http://", "https://")):
|
|
97
|
+
return model
|
|
98
|
+
|
|
99
|
+
return (
|
|
100
|
+
# Feature-extraction and sentence-similarity are the only cases where we handle models with several tasks.
|
|
101
|
+
f"{BASE_URL}/pipeline/{self.task}/{model}"
|
|
102
|
+
if self.task in ("feature-extraction", "sentence-similarity")
|
|
103
|
+
# Otherwise, we use the default endpoint
|
|
104
|
+
else f"{BASE_URL}/models/{model}"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def prepare_headers(self, headers: Dict, *, api_key: Optional[Union[bool, str]] = None) -> Dict:
|
|
108
|
+
return {**build_hf_headers(token=api_key), **headers}
|
|
109
|
+
|
|
110
|
+
def _prepare_payload(
|
|
111
|
+
self, inputs: Any, parameters: Dict[str, Any], model: Optional[str], extra_payload: Dict[str, Any]
|
|
112
|
+
) -> Tuple[Any, Any]:
|
|
113
|
+
if isinstance(inputs, bytes):
|
|
114
|
+
raise ValueError(f"Unexpected binary input for task {self.task}.")
|
|
115
|
+
if isinstance(inputs, Path):
|
|
116
|
+
raise ValueError(f"Unexpected path input for task {self.task} (got {inputs})")
|
|
117
|
+
return None, {
|
|
118
|
+
"inputs": inputs,
|
|
119
|
+
"parameters": {k: v for k, v in parameters.items() if v is not None},
|
|
120
|
+
**extra_payload,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
def get_response(self, response: Union[bytes, Dict]) -> Any:
|
|
124
|
+
return response
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class HFInferenceBinaryInputTask(HFInferenceTask):
|
|
128
|
+
def _prepare_payload(
|
|
129
|
+
self, inputs: Any, parameters: Dict[str, Any], model: Optional[str], extra_payload: Dict[str, Any]
|
|
130
|
+
) -> Tuple[Any, Any]:
|
|
131
|
+
parameters = {k: v for k, v in parameters.items() if v is not None}
|
|
132
|
+
has_parameters = len(parameters) > 0 or len(extra_payload) > 0
|
|
133
|
+
|
|
134
|
+
# Raise if not a binary object or a local path or a URL.
|
|
135
|
+
if not isinstance(inputs, (bytes, Path)) and not isinstance(inputs, str):
|
|
136
|
+
raise ValueError(f"Expected binary inputs or a local path or a URL. Got {inputs}")
|
|
137
|
+
|
|
138
|
+
# Send inputs as raw content when no parameters are provided
|
|
139
|
+
if not has_parameters:
|
|
140
|
+
with _open_as_binary(inputs) as data:
|
|
141
|
+
data_as_bytes = data if isinstance(data, bytes) else data.read()
|
|
142
|
+
return data_as_bytes, None
|
|
143
|
+
|
|
144
|
+
# Otherwise encode as b64
|
|
145
|
+
return None, {"inputs": _b64_encode(inputs), "parameters": parameters, **extra_payload}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class HFInferenceConversational(HFInferenceTask):
|
|
149
|
+
def __init__(self):
|
|
150
|
+
super().__init__("text-generation")
|
|
151
|
+
|
|
152
|
+
def prepare_request(
|
|
153
|
+
self,
|
|
154
|
+
*,
|
|
155
|
+
inputs: Any,
|
|
156
|
+
parameters: Dict[str, Any],
|
|
157
|
+
headers: Dict,
|
|
158
|
+
model: Optional[str],
|
|
159
|
+
api_key: Optional[str],
|
|
160
|
+
extra_payload: Optional[Dict[str, Any]] = None,
|
|
161
|
+
) -> RequestParameters:
|
|
162
|
+
model = self.map_model(model)
|
|
163
|
+
payload_model = parameters.get("model") or model
|
|
164
|
+
|
|
165
|
+
if payload_model is None or payload_model.startswith(("http://", "https://")):
|
|
166
|
+
payload_model = "tgi" # use a random string if not provided
|
|
167
|
+
|
|
168
|
+
json = {
|
|
169
|
+
**{key: value for key, value in parameters.items() if value is not None},
|
|
170
|
+
"model": payload_model,
|
|
171
|
+
"messages": inputs,
|
|
172
|
+
**(extra_payload or {}),
|
|
173
|
+
}
|
|
174
|
+
headers = self.prepare_headers(headers=headers, api_key=api_key)
|
|
175
|
+
|
|
176
|
+
return RequestParameters(
|
|
177
|
+
url=self.build_url(model),
|
|
178
|
+
task=self.task,
|
|
179
|
+
model=model,
|
|
180
|
+
json=json,
|
|
181
|
+
data=None,
|
|
182
|
+
headers=headers,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
def build_url(self, model: str) -> str:
|
|
186
|
+
base_url = model if model.startswith(("http://", "https://")) else f"{BASE_URL}/models/{model}"
|
|
187
|
+
return _build_chat_completion_url(base_url)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _build_chat_completion_url(model_url: str) -> str:
|
|
191
|
+
# Strip trailing /
|
|
192
|
+
model_url = model_url.rstrip("/")
|
|
193
|
+
|
|
194
|
+
# Append /chat/completions if not already present
|
|
195
|
+
if model_url.endswith("/v1"):
|
|
196
|
+
model_url += "/chat/completions"
|
|
197
|
+
|
|
198
|
+
# Append /v1/chat/completions if not already present
|
|
199
|
+
if not model_url.endswith("/chat/completions"):
|
|
200
|
+
model_url += "/v1/chat/completions"
|
|
201
|
+
|
|
202
|
+
return model_url
|