huggingface-hub 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +145 -46
- huggingface_hub/_commit_api.py +168 -119
- huggingface_hub/_commit_scheduler.py +15 -15
- huggingface_hub/_inference_endpoints.py +15 -12
- huggingface_hub/_jobs_api.py +301 -0
- huggingface_hub/_local_folder.py +18 -3
- huggingface_hub/_login.py +31 -63
- huggingface_hub/_oauth.py +460 -0
- huggingface_hub/_snapshot_download.py +239 -80
- huggingface_hub/_space_api.py +5 -5
- huggingface_hub/_tensorboard_logger.py +15 -19
- huggingface_hub/_upload_large_folder.py +172 -76
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +13 -25
- huggingface_hub/{commands → cli}/__init__.py +1 -15
- huggingface_hub/cli/_cli_utils.py +173 -0
- huggingface_hub/cli/auth.py +147 -0
- huggingface_hub/cli/cache.py +841 -0
- huggingface_hub/cli/download.py +189 -0
- huggingface_hub/cli/hf.py +60 -0
- huggingface_hub/cli/inference_endpoints.py +377 -0
- huggingface_hub/cli/jobs.py +772 -0
- huggingface_hub/cli/lfs.py +175 -0
- huggingface_hub/cli/repo.py +315 -0
- huggingface_hub/cli/repo_files.py +94 -0
- huggingface_hub/{commands/env.py → cli/system.py} +10 -13
- huggingface_hub/cli/upload.py +294 -0
- huggingface_hub/cli/upload_large_folder.py +117 -0
- huggingface_hub/community.py +20 -12
- huggingface_hub/constants.py +38 -53
- huggingface_hub/dataclasses.py +609 -0
- huggingface_hub/errors.py +80 -30
- huggingface_hub/fastai_utils.py +30 -41
- huggingface_hub/file_download.py +435 -351
- huggingface_hub/hf_api.py +2050 -1124
- huggingface_hub/hf_file_system.py +269 -152
- huggingface_hub/hub_mixin.py +43 -63
- huggingface_hub/inference/_client.py +347 -434
- huggingface_hub/inference/_common.py +133 -121
- huggingface_hub/inference/_generated/_async_client.py +397 -541
- huggingface_hub/inference/_generated/types/__init__.py +5 -1
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +59 -23
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
- huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/__init__.py +0 -0
- huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
- huggingface_hub/inference/_mcp/agent.py +100 -0
- huggingface_hub/inference/_mcp/cli.py +247 -0
- huggingface_hub/inference/_mcp/constants.py +81 -0
- huggingface_hub/inference/_mcp/mcp_client.py +395 -0
- huggingface_hub/inference/_mcp/types.py +45 -0
- huggingface_hub/inference/_mcp/utils.py +128 -0
- huggingface_hub/inference/_providers/__init__.py +82 -7
- huggingface_hub/inference/_providers/_common.py +129 -27
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cerebras.py +1 -1
- huggingface_hub/inference/_providers/clarifai.py +13 -0
- huggingface_hub/inference/_providers/cohere.py +20 -3
- huggingface_hub/inference/_providers/fal_ai.py +183 -56
- huggingface_hub/inference/_providers/featherless_ai.py +38 -0
- huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
- huggingface_hub/inference/_providers/groq.py +9 -0
- huggingface_hub/inference/_providers/hf_inference.py +69 -30
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +33 -5
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +44 -0
- huggingface_hub/inference/_providers/openai.py +3 -1
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/replicate.py +31 -13
- huggingface_hub/inference/_providers/sambanova.py +18 -4
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/together.py +20 -5
- huggingface_hub/inference/_providers/wavespeed.py +138 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +33 -100
- huggingface_hub/repocard.py +34 -38
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +12 -15
- huggingface_hub/serialization/_dduf.py +8 -8
- huggingface_hub/serialization/_torch.py +69 -69
- huggingface_hub/utils/__init__.py +19 -8
- huggingface_hub/utils/_auth.py +7 -7
- huggingface_hub/utils/_cache_manager.py +92 -147
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +55 -0
- huggingface_hub/utils/_experimental.py +7 -5
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +5 -5
- huggingface_hub/utils/_headers.py +8 -30
- huggingface_hub/utils/_http.py +398 -239
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +61 -24
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +4 -4
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +55 -74
- huggingface_hub/utils/_verification.py +167 -0
- huggingface_hub/utils/_xet.py +64 -17
- huggingface_hub/utils/_xet_progress_reporting.py +162 -0
- huggingface_hub/utils/insecure_hashlib.py +3 -5
- huggingface_hub/utils/logging.py +8 -11
- huggingface_hub/utils/tqdm.py +5 -4
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -85
- huggingface_hub-1.1.3.dist-info/RECORD +155 -0
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
- huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
- huggingface_hub/commands/delete_cache.py +0 -474
- huggingface_hub/commands/download.py +0 -200
- huggingface_hub/commands/huggingface_cli.py +0 -61
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo_files.py +0 -128
- huggingface_hub/commands/scan_cache.py +0 -181
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -314
- huggingface_hub/commands/upload_large_folder.py +0 -129
- huggingface_hub/commands/user.py +0 -304
- huggingface_hub/commands/version.py +0 -37
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.31.0rc0.dist-info/RECORD +0 -135
- huggingface_hub-0.31.0rc0.dist-info/entry_points.txt +0 -6
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
|
@@ -34,18 +34,17 @@
|
|
|
34
34
|
# - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
|
|
35
35
|
import base64
|
|
36
36
|
import logging
|
|
37
|
+
import os
|
|
37
38
|
import re
|
|
38
39
|
import warnings
|
|
39
|
-
from
|
|
40
|
-
|
|
41
|
-
from requests import HTTPError
|
|
40
|
+
from contextlib import ExitStack
|
|
41
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
42
42
|
|
|
43
43
|
from huggingface_hub import constants
|
|
44
|
-
from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
|
|
44
|
+
from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
|
|
45
45
|
from huggingface_hub.inference._common import (
|
|
46
46
|
TASKS_EXPECTING_IMAGES,
|
|
47
47
|
ContentT,
|
|
48
|
-
ModelStatus,
|
|
49
48
|
RequestParameters,
|
|
50
49
|
_b64_encode,
|
|
51
50
|
_b64_to_image,
|
|
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
|
|
|
54
53
|
_bytes_to_list,
|
|
55
54
|
_get_unsupported_text_generation_kwargs,
|
|
56
55
|
_import_numpy,
|
|
57
|
-
_open_as_binary,
|
|
58
56
|
_set_unsupported_text_generation_kwargs,
|
|
59
57
|
_stream_chat_completion_response,
|
|
60
58
|
_stream_text_generation_response,
|
|
@@ -66,6 +64,7 @@ from huggingface_hub.inference._generated.types import (
|
|
|
66
64
|
AudioToAudioOutputElement,
|
|
67
65
|
AutomaticSpeechRecognitionOutput,
|
|
68
66
|
ChatCompletionInputGrammarType,
|
|
67
|
+
ChatCompletionInputMessage,
|
|
69
68
|
ChatCompletionInputStreamOptions,
|
|
70
69
|
ChatCompletionInputTool,
|
|
71
70
|
ChatCompletionInputToolChoiceClass,
|
|
@@ -80,6 +79,7 @@ from huggingface_hub.inference._generated.types import (
|
|
|
80
79
|
ImageSegmentationSubtask,
|
|
81
80
|
ImageToImageTargetSize,
|
|
82
81
|
ImageToTextOutput,
|
|
82
|
+
ImageToVideoTargetSize,
|
|
83
83
|
ObjectDetectionOutputElement,
|
|
84
84
|
Padding,
|
|
85
85
|
QuestionAnsweringOutputElement,
|
|
@@ -100,10 +100,14 @@ from huggingface_hub.inference._generated.types import (
|
|
|
100
100
|
ZeroShotClassificationOutputElement,
|
|
101
101
|
ZeroShotImageClassificationOutputElement,
|
|
102
102
|
)
|
|
103
|
-
from huggingface_hub.inference._providers import
|
|
104
|
-
from huggingface_hub.utils import
|
|
103
|
+
from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
|
|
104
|
+
from huggingface_hub.utils import (
|
|
105
|
+
build_hf_headers,
|
|
106
|
+
get_session,
|
|
107
|
+
hf_raise_for_status,
|
|
108
|
+
validate_hf_hub_args,
|
|
109
|
+
)
|
|
105
110
|
from huggingface_hub.utils._auth import get_token
|
|
106
|
-
from huggingface_hub.utils._deprecation import _deprecate_method
|
|
107
111
|
|
|
108
112
|
|
|
109
113
|
if TYPE_CHECKING:
|
|
@@ -129,11 +133,9 @@ class InferenceClient:
|
|
|
129
133
|
or a URL to a deployed Inference Endpoint. Defaults to None, in which case a recommended model is
|
|
130
134
|
automatically selected for the task.
|
|
131
135
|
Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
|
|
132
|
-
arguments are mutually exclusive. If
|
|
133
|
-
path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
|
|
134
|
-
documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
|
|
136
|
+
arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
|
|
135
137
|
provider (`str`, *optional*):
|
|
136
|
-
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"
|
|
138
|
+
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
|
|
137
139
|
Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
|
|
138
140
|
If model is a URL or `base_url` is passed, then `provider` is not used.
|
|
139
141
|
token (`str`, *optional*):
|
|
@@ -142,16 +144,14 @@ class InferenceClient:
|
|
|
142
144
|
arguments are mutually exclusive and have the exact same behavior.
|
|
143
145
|
timeout (`float`, `optional`):
|
|
144
146
|
The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
|
|
145
|
-
headers (`
|
|
147
|
+
headers (`dict[str, str]`, `optional`):
|
|
146
148
|
Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
|
|
147
149
|
Values in this dictionary will override the default values.
|
|
148
150
|
bill_to (`str`, `optional`):
|
|
149
151
|
The billing account to use for the requests. By default the requests are billed on the user's account.
|
|
150
152
|
Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
|
|
151
|
-
cookies (`
|
|
153
|
+
cookies (`dict[str, str]`, `optional`):
|
|
152
154
|
Additional cookies to send to the server.
|
|
153
|
-
proxies (`Any`, `optional`):
|
|
154
|
-
Proxies to use for the request.
|
|
155
155
|
base_url (`str`, `optional`):
|
|
156
156
|
Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
|
|
157
157
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
|
|
@@ -160,16 +160,16 @@ class InferenceClient:
|
|
|
160
160
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
|
|
161
161
|
"""
|
|
162
162
|
|
|
163
|
+
@validate_hf_hub_args
|
|
163
164
|
def __init__(
|
|
164
165
|
self,
|
|
165
166
|
model: Optional[str] = None,
|
|
166
167
|
*,
|
|
167
|
-
provider:
|
|
168
|
+
provider: Optional[PROVIDER_OR_POLICY_T] = None,
|
|
168
169
|
token: Optional[str] = None,
|
|
169
170
|
timeout: Optional[float] = None,
|
|
170
|
-
headers: Optional[
|
|
171
|
-
cookies: Optional[
|
|
172
|
-
proxies: Optional[Any] = None,
|
|
171
|
+
headers: Optional[dict[str, str]] = None,
|
|
172
|
+
cookies: Optional[dict[str, str]] = None,
|
|
173
173
|
bill_to: Optional[str] = None,
|
|
174
174
|
# OpenAI compatibility
|
|
175
175
|
base_url: Optional[str] = None,
|
|
@@ -190,7 +190,7 @@ class InferenceClient:
|
|
|
190
190
|
)
|
|
191
191
|
token = token if token is not None else api_key
|
|
192
192
|
if isinstance(token, bool):
|
|
193
|
-
# Legacy behavior: previously
|
|
193
|
+
# Legacy behavior: previously it was possible to pass `token=False` to disable authentication. This is not
|
|
194
194
|
# supported anymore as authentication is required. Better to explicitly raise here rather than risking
|
|
195
195
|
# sending the locally saved token without the user knowing about it.
|
|
196
196
|
if token is False:
|
|
@@ -231,11 +231,21 @@ class InferenceClient:
|
|
|
231
231
|
|
|
232
232
|
self.cookies = cookies
|
|
233
233
|
self.timeout = timeout
|
|
234
|
-
|
|
234
|
+
|
|
235
|
+
self.exit_stack = ExitStack()
|
|
235
236
|
|
|
236
237
|
def __repr__(self):
|
|
237
238
|
return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
|
|
238
239
|
|
|
240
|
+
def __enter__(self):
|
|
241
|
+
return self
|
|
242
|
+
|
|
243
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
244
|
+
self.exit_stack.close()
|
|
245
|
+
|
|
246
|
+
def close(self):
|
|
247
|
+
self.exit_stack.close()
|
|
248
|
+
|
|
239
249
|
@overload
|
|
240
250
|
def _inner_post( # type: ignore[misc]
|
|
241
251
|
self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
|
|
@@ -244,45 +254,46 @@ class InferenceClient:
|
|
|
244
254
|
@overload
|
|
245
255
|
def _inner_post( # type: ignore[misc]
|
|
246
256
|
self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
|
|
247
|
-
) -> Iterable[
|
|
257
|
+
) -> Iterable[str]: ...
|
|
248
258
|
|
|
249
259
|
@overload
|
|
250
260
|
def _inner_post(
|
|
251
261
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
252
|
-
) -> Union[bytes, Iterable[
|
|
262
|
+
) -> Union[bytes, Iterable[str]]: ...
|
|
253
263
|
|
|
254
264
|
def _inner_post(
|
|
255
265
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
256
|
-
) -> Union[bytes, Iterable[
|
|
266
|
+
) -> Union[bytes, Iterable[str]]:
|
|
257
267
|
"""Make a request to the inference server."""
|
|
258
268
|
# TODO: this should be handled in provider helpers directly
|
|
259
269
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
260
270
|
request_parameters.headers["Accept"] = "image/png"
|
|
261
271
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
272
|
+
try:
|
|
273
|
+
response = self.exit_stack.enter_context(
|
|
274
|
+
get_session().stream(
|
|
275
|
+
"POST",
|
|
265
276
|
request_parameters.url,
|
|
266
277
|
json=request_parameters.json,
|
|
267
|
-
data
|
|
278
|
+
content=request_parameters.data,
|
|
268
279
|
headers=request_parameters.headers,
|
|
269
280
|
cookies=self.cookies,
|
|
270
281
|
timeout=self.timeout,
|
|
271
|
-
stream=stream,
|
|
272
|
-
proxies=self.proxies,
|
|
273
282
|
)
|
|
274
|
-
|
|
275
|
-
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
276
|
-
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
277
|
-
|
|
278
|
-
try:
|
|
283
|
+
)
|
|
279
284
|
hf_raise_for_status(response)
|
|
280
|
-
|
|
281
|
-
|
|
285
|
+
if stream:
|
|
286
|
+
return response.iter_lines()
|
|
287
|
+
else:
|
|
288
|
+
return response.read()
|
|
289
|
+
except TimeoutError as error:
|
|
290
|
+
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
291
|
+
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
292
|
+
except HfHubHTTPError as error:
|
|
282
293
|
if error.response.status_code == 422 and request_parameters.task != "unknown":
|
|
283
294
|
msg = str(error.args[0])
|
|
284
295
|
if len(error.response.text) > 0:
|
|
285
|
-
msg += f"
|
|
296
|
+
msg += f"{os.linesep}{error.response.text}{os.linesep}"
|
|
286
297
|
error.args = (msg,) + error.args[1:]
|
|
287
298
|
raise
|
|
288
299
|
|
|
@@ -293,7 +304,7 @@ class InferenceClient:
|
|
|
293
304
|
model: Optional[str] = None,
|
|
294
305
|
top_k: Optional[int] = None,
|
|
295
306
|
function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
|
|
296
|
-
) ->
|
|
307
|
+
) -> list[AudioClassificationOutputElement]:
|
|
297
308
|
"""
|
|
298
309
|
Perform audio classification on the provided audio content.
|
|
299
310
|
|
|
@@ -311,12 +322,12 @@ class InferenceClient:
|
|
|
311
322
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
312
323
|
|
|
313
324
|
Returns:
|
|
314
|
-
`
|
|
325
|
+
`list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
315
326
|
|
|
316
327
|
Raises:
|
|
317
328
|
[`InferenceTimeoutError`]:
|
|
318
329
|
If the model is unavailable or the request times out.
|
|
319
|
-
`
|
|
330
|
+
[`HfHubHTTPError`]:
|
|
320
331
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
321
332
|
|
|
322
333
|
Example:
|
|
@@ -348,7 +359,7 @@ class InferenceClient:
|
|
|
348
359
|
audio: ContentT,
|
|
349
360
|
*,
|
|
350
361
|
model: Optional[str] = None,
|
|
351
|
-
) ->
|
|
362
|
+
) -> list[AudioToAudioOutputElement]:
|
|
352
363
|
"""
|
|
353
364
|
Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
|
|
354
365
|
|
|
@@ -362,12 +373,12 @@ class InferenceClient:
|
|
|
362
373
|
audio_to_audio will be used.
|
|
363
374
|
|
|
364
375
|
Returns:
|
|
365
|
-
`
|
|
376
|
+
`list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
|
|
366
377
|
|
|
367
378
|
Raises:
|
|
368
379
|
`InferenceTimeoutError`:
|
|
369
380
|
If the model is unavailable or the request times out.
|
|
370
|
-
`
|
|
381
|
+
[`HfHubHTTPError`]:
|
|
371
382
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
372
383
|
|
|
373
384
|
Example:
|
|
@@ -400,7 +411,7 @@ class InferenceClient:
|
|
|
400
411
|
audio: ContentT,
|
|
401
412
|
*,
|
|
402
413
|
model: Optional[str] = None,
|
|
403
|
-
extra_body: Optional[
|
|
414
|
+
extra_body: Optional[dict] = None,
|
|
404
415
|
) -> AutomaticSpeechRecognitionOutput:
|
|
405
416
|
"""
|
|
406
417
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -411,7 +422,7 @@ class InferenceClient:
|
|
|
411
422
|
model (`str`, *optional*):
|
|
412
423
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
413
424
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
414
|
-
extra_body (`
|
|
425
|
+
extra_body (`dict`, *optional*):
|
|
415
426
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
416
427
|
for supported parameters.
|
|
417
428
|
Returns:
|
|
@@ -420,7 +431,7 @@ class InferenceClient:
|
|
|
420
431
|
Raises:
|
|
421
432
|
[`InferenceTimeoutError`]:
|
|
422
433
|
If the model is unavailable or the request times out.
|
|
423
|
-
`
|
|
434
|
+
[`HfHubHTTPError`]:
|
|
424
435
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
425
436
|
|
|
426
437
|
Example:
|
|
@@ -446,121 +457,117 @@ class InferenceClient:
|
|
|
446
457
|
@overload
|
|
447
458
|
def chat_completion( # type: ignore
|
|
448
459
|
self,
|
|
449
|
-
messages:
|
|
460
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
450
461
|
*,
|
|
451
462
|
model: Optional[str] = None,
|
|
452
463
|
stream: Literal[False] = False,
|
|
453
464
|
frequency_penalty: Optional[float] = None,
|
|
454
|
-
logit_bias: Optional[
|
|
465
|
+
logit_bias: Optional[list[float]] = None,
|
|
455
466
|
logprobs: Optional[bool] = None,
|
|
456
467
|
max_tokens: Optional[int] = None,
|
|
457
468
|
n: Optional[int] = None,
|
|
458
469
|
presence_penalty: Optional[float] = None,
|
|
459
470
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
460
471
|
seed: Optional[int] = None,
|
|
461
|
-
stop: Optional[
|
|
472
|
+
stop: Optional[list[str]] = None,
|
|
462
473
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
463
474
|
temperature: Optional[float] = None,
|
|
464
475
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
465
476
|
tool_prompt: Optional[str] = None,
|
|
466
|
-
tools: Optional[
|
|
477
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
467
478
|
top_logprobs: Optional[int] = None,
|
|
468
479
|
top_p: Optional[float] = None,
|
|
469
|
-
extra_body: Optional[
|
|
480
|
+
extra_body: Optional[dict] = None,
|
|
470
481
|
) -> ChatCompletionOutput: ...
|
|
471
482
|
|
|
472
483
|
@overload
|
|
473
484
|
def chat_completion( # type: ignore
|
|
474
485
|
self,
|
|
475
|
-
messages:
|
|
486
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
476
487
|
*,
|
|
477
488
|
model: Optional[str] = None,
|
|
478
489
|
stream: Literal[True] = True,
|
|
479
490
|
frequency_penalty: Optional[float] = None,
|
|
480
|
-
logit_bias: Optional[
|
|
491
|
+
logit_bias: Optional[list[float]] = None,
|
|
481
492
|
logprobs: Optional[bool] = None,
|
|
482
493
|
max_tokens: Optional[int] = None,
|
|
483
494
|
n: Optional[int] = None,
|
|
484
495
|
presence_penalty: Optional[float] = None,
|
|
485
496
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
486
497
|
seed: Optional[int] = None,
|
|
487
|
-
stop: Optional[
|
|
498
|
+
stop: Optional[list[str]] = None,
|
|
488
499
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
489
500
|
temperature: Optional[float] = None,
|
|
490
501
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
491
502
|
tool_prompt: Optional[str] = None,
|
|
492
|
-
tools: Optional[
|
|
503
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
493
504
|
top_logprobs: Optional[int] = None,
|
|
494
505
|
top_p: Optional[float] = None,
|
|
495
|
-
extra_body: Optional[
|
|
506
|
+
extra_body: Optional[dict] = None,
|
|
496
507
|
) -> Iterable[ChatCompletionStreamOutput]: ...
|
|
497
508
|
|
|
498
509
|
@overload
|
|
499
510
|
def chat_completion(
|
|
500
511
|
self,
|
|
501
|
-
messages:
|
|
512
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
502
513
|
*,
|
|
503
514
|
model: Optional[str] = None,
|
|
504
515
|
stream: bool = False,
|
|
505
516
|
frequency_penalty: Optional[float] = None,
|
|
506
|
-
logit_bias: Optional[
|
|
517
|
+
logit_bias: Optional[list[float]] = None,
|
|
507
518
|
logprobs: Optional[bool] = None,
|
|
508
519
|
max_tokens: Optional[int] = None,
|
|
509
520
|
n: Optional[int] = None,
|
|
510
521
|
presence_penalty: Optional[float] = None,
|
|
511
522
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
512
523
|
seed: Optional[int] = None,
|
|
513
|
-
stop: Optional[
|
|
524
|
+
stop: Optional[list[str]] = None,
|
|
514
525
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
515
526
|
temperature: Optional[float] = None,
|
|
516
527
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
517
528
|
tool_prompt: Optional[str] = None,
|
|
518
|
-
tools: Optional[
|
|
529
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
519
530
|
top_logprobs: Optional[int] = None,
|
|
520
531
|
top_p: Optional[float] = None,
|
|
521
|
-
extra_body: Optional[
|
|
532
|
+
extra_body: Optional[dict] = None,
|
|
522
533
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
|
|
523
534
|
|
|
524
535
|
def chat_completion(
|
|
525
536
|
self,
|
|
526
|
-
messages:
|
|
537
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
527
538
|
*,
|
|
528
539
|
model: Optional[str] = None,
|
|
529
540
|
stream: bool = False,
|
|
530
541
|
# Parameters from ChatCompletionInput (handled manually)
|
|
531
542
|
frequency_penalty: Optional[float] = None,
|
|
532
|
-
logit_bias: Optional[
|
|
543
|
+
logit_bias: Optional[list[float]] = None,
|
|
533
544
|
logprobs: Optional[bool] = None,
|
|
534
545
|
max_tokens: Optional[int] = None,
|
|
535
546
|
n: Optional[int] = None,
|
|
536
547
|
presence_penalty: Optional[float] = None,
|
|
537
548
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
538
549
|
seed: Optional[int] = None,
|
|
539
|
-
stop: Optional[
|
|
550
|
+
stop: Optional[list[str]] = None,
|
|
540
551
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
541
552
|
temperature: Optional[float] = None,
|
|
542
553
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
543
554
|
tool_prompt: Optional[str] = None,
|
|
544
|
-
tools: Optional[
|
|
555
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
545
556
|
top_logprobs: Optional[int] = None,
|
|
546
557
|
top_p: Optional[float] = None,
|
|
547
|
-
extra_body: Optional[
|
|
558
|
+
extra_body: Optional[dict] = None,
|
|
548
559
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
|
|
549
560
|
"""
|
|
550
561
|
A method for completing conversations using a specified language model.
|
|
551
562
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
for more details about OpenAI's compatibility.
|
|
558
|
-
|
|
559
|
-
</Tip>
|
|
563
|
+
> [!TIP]
|
|
564
|
+
> The `client.chat_completion` method is aliased as `client.chat.completions.create` for compatibility with OpenAI's client.
|
|
565
|
+
> Inputs and outputs are strictly the same and using either syntax will yield the same results.
|
|
566
|
+
> Check out the [Inference guide](https://huggingface.co/docs/huggingface_hub/guides/inference#openai-compatibility)
|
|
567
|
+
> for more details about OpenAI's compatibility.
|
|
560
568
|
|
|
561
|
-
|
|
562
|
-
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
563
|
-
</Tip>
|
|
569
|
+
> [!TIP]
|
|
570
|
+
> You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
564
571
|
|
|
565
572
|
Args:
|
|
566
573
|
messages (List of [`ChatCompletionInputMessage`]):
|
|
@@ -574,7 +581,7 @@ class InferenceClient:
|
|
|
574
581
|
frequency_penalty (`float`, *optional*):
|
|
575
582
|
Penalizes new tokens based on their existing frequency
|
|
576
583
|
in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
|
|
577
|
-
logit_bias (`
|
|
584
|
+
logit_bias (`list[float]`, *optional*):
|
|
578
585
|
Adjusts the likelihood of specific tokens appearing in the generated output.
|
|
579
586
|
logprobs (`bool`, *optional*):
|
|
580
587
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
@@ -590,7 +597,7 @@ class InferenceClient:
|
|
|
590
597
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
591
598
|
seed (Optional[`int`], *optional*):
|
|
592
599
|
Seed for reproducible control flow. Defaults to None.
|
|
593
|
-
stop (`
|
|
600
|
+
stop (`list[str]`, *optional*):
|
|
594
601
|
Up to four strings which trigger the end of the response.
|
|
595
602
|
Defaults to None.
|
|
596
603
|
stream (`bool`, *optional*):
|
|
@@ -614,7 +621,7 @@ class InferenceClient:
|
|
|
614
621
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
615
622
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
616
623
|
provide a list of functions the model may generate JSON inputs for.
|
|
617
|
-
extra_body (`
|
|
624
|
+
extra_body (`dict`, *optional*):
|
|
618
625
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
619
626
|
for supported parameters.
|
|
620
627
|
Returns:
|
|
@@ -626,7 +633,7 @@ class InferenceClient:
|
|
|
626
633
|
Raises:
|
|
627
634
|
[`InferenceTimeoutError`]:
|
|
628
635
|
If the model is unavailable or the request times out.
|
|
629
|
-
`
|
|
636
|
+
[`HfHubHTTPError`]:
|
|
630
637
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
631
638
|
|
|
632
639
|
Example:
|
|
@@ -852,7 +859,7 @@ class InferenceClient:
|
|
|
852
859
|
>>> messages = [
|
|
853
860
|
... {
|
|
854
861
|
... "role": "user",
|
|
855
|
-
... "content": "I saw a puppy a cat and a raccoon during my bike ride in the park. What did I
|
|
862
|
+
... "content": "I saw a puppy a cat and a raccoon during my bike ride in the park. What did I see and when?",
|
|
856
863
|
... },
|
|
857
864
|
... ]
|
|
858
865
|
>>> response_format = {
|
|
@@ -940,8 +947,8 @@ class InferenceClient:
|
|
|
940
947
|
max_question_len: Optional[int] = None,
|
|
941
948
|
max_seq_len: Optional[int] = None,
|
|
942
949
|
top_k: Optional[int] = None,
|
|
943
|
-
word_boxes: Optional[
|
|
944
|
-
) ->
|
|
950
|
+
word_boxes: Optional[list[Union[list[float], str]]] = None,
|
|
951
|
+
) -> list[DocumentQuestionAnsweringOutputElement]:
|
|
945
952
|
"""
|
|
946
953
|
Answer questions on document images.
|
|
947
954
|
|
|
@@ -971,16 +978,16 @@ class InferenceClient:
|
|
|
971
978
|
top_k (`int`, *optional*):
|
|
972
979
|
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
973
980
|
answers if there are not enough options available within the context.
|
|
974
|
-
word_boxes (`
|
|
981
|
+
word_boxes (`list[Union[list[float], str`, *optional*):
|
|
975
982
|
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
976
983
|
step and use the provided bounding boxes instead.
|
|
977
984
|
Returns:
|
|
978
|
-
`
|
|
985
|
+
`list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
979
986
|
|
|
980
987
|
Raises:
|
|
981
988
|
[`InferenceTimeoutError`]:
|
|
982
989
|
If the model is unavailable or the request times out.
|
|
983
|
-
`
|
|
990
|
+
[`HfHubHTTPError`]:
|
|
984
991
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
985
992
|
|
|
986
993
|
|
|
@@ -994,7 +1001,7 @@ class InferenceClient:
|
|
|
994
1001
|
"""
|
|
995
1002
|
model_id = model or self.model
|
|
996
1003
|
provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
|
|
997
|
-
inputs:
|
|
1004
|
+
inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
998
1005
|
request_parameters = provider_helper.prepare_request(
|
|
999
1006
|
inputs=inputs,
|
|
1000
1007
|
parameters={
|
|
@@ -1055,7 +1062,7 @@ class InferenceClient:
|
|
|
1055
1062
|
Raises:
|
|
1056
1063
|
[`InferenceTimeoutError`]:
|
|
1057
1064
|
If the model is unavailable or the request times out.
|
|
1058
|
-
`
|
|
1065
|
+
[`HfHubHTTPError`]:
|
|
1059
1066
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1060
1067
|
|
|
1061
1068
|
Example:
|
|
@@ -1092,9 +1099,9 @@ class InferenceClient:
|
|
|
1092
1099
|
text: str,
|
|
1093
1100
|
*,
|
|
1094
1101
|
model: Optional[str] = None,
|
|
1095
|
-
targets: Optional[
|
|
1102
|
+
targets: Optional[list[str]] = None,
|
|
1096
1103
|
top_k: Optional[int] = None,
|
|
1097
|
-
) ->
|
|
1104
|
+
) -> list[FillMaskOutputElement]:
|
|
1098
1105
|
"""
|
|
1099
1106
|
Fill in a hole with a missing word (token to be precise).
|
|
1100
1107
|
|
|
@@ -1104,20 +1111,20 @@ class InferenceClient:
|
|
|
1104
1111
|
model (`str`, *optional*):
|
|
1105
1112
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1106
1113
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1107
|
-
targets (`
|
|
1114
|
+
targets (`list[str`, *optional*):
|
|
1108
1115
|
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1109
1116
|
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1110
1117
|
resulting token will be used (with a warning, and that might be slower).
|
|
1111
1118
|
top_k (`int`, *optional*):
|
|
1112
1119
|
When passed, overrides the number of predictions to return.
|
|
1113
1120
|
Returns:
|
|
1114
|
-
`
|
|
1121
|
+
`list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
|
|
1115
1122
|
probability, token reference, and completed text.
|
|
1116
1123
|
|
|
1117
1124
|
Raises:
|
|
1118
1125
|
[`InferenceTimeoutError`]:
|
|
1119
1126
|
If the model is unavailable or the request times out.
|
|
1120
|
-
`
|
|
1127
|
+
[`HfHubHTTPError`]:
|
|
1121
1128
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1122
1129
|
|
|
1123
1130
|
Example:
|
|
@@ -1150,13 +1157,13 @@ class InferenceClient:
|
|
|
1150
1157
|
model: Optional[str] = None,
|
|
1151
1158
|
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1152
1159
|
top_k: Optional[int] = None,
|
|
1153
|
-
) ->
|
|
1160
|
+
) -> list[ImageClassificationOutputElement]:
|
|
1154
1161
|
"""
|
|
1155
1162
|
Perform image classification on the given image using the specified model.
|
|
1156
1163
|
|
|
1157
1164
|
Args:
|
|
1158
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1159
|
-
The image to classify. It can be raw bytes, an image file,
|
|
1165
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1166
|
+
The image to classify. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1160
1167
|
model (`str`, *optional*):
|
|
1161
1168
|
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
|
|
1162
1169
|
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
|
|
@@ -1165,12 +1172,12 @@ class InferenceClient:
|
|
|
1165
1172
|
top_k (`int`, *optional*):
|
|
1166
1173
|
When specified, limits the output to the top K most probable classes.
|
|
1167
1174
|
Returns:
|
|
1168
|
-
`
|
|
1175
|
+
`list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1169
1176
|
|
|
1170
1177
|
Raises:
|
|
1171
1178
|
[`InferenceTimeoutError`]:
|
|
1172
1179
|
If the model is unavailable or the request times out.
|
|
1173
|
-
`
|
|
1180
|
+
[`HfHubHTTPError`]:
|
|
1174
1181
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1175
1182
|
|
|
1176
1183
|
Example:
|
|
@@ -1202,19 +1209,16 @@ class InferenceClient:
|
|
|
1202
1209
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1203
1210
|
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1204
1211
|
threshold: Optional[float] = None,
|
|
1205
|
-
) ->
|
|
1212
|
+
) -> list[ImageSegmentationOutputElement]:
|
|
1206
1213
|
"""
|
|
1207
1214
|
Perform image segmentation on the given image using the specified model.
|
|
1208
1215
|
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
You must have `PIL` installed if you want to work with images (`pip install Pillow`).
|
|
1212
|
-
|
|
1213
|
-
</Tip>
|
|
1216
|
+
> [!WARNING]
|
|
1217
|
+
> You must have `PIL` installed if you want to work with images (`pip install Pillow`).
|
|
1214
1218
|
|
|
1215
1219
|
Args:
|
|
1216
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1217
|
-
The image to segment. It can be raw bytes, an image file,
|
|
1220
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1221
|
+
The image to segment. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1218
1222
|
model (`str`, *optional*):
|
|
1219
1223
|
The model to use for image segmentation. Can be a model ID hosted on the Hugging Face Hub or a URL to a
|
|
1220
1224
|
deployed Inference Endpoint. If not provided, the default recommended model for image segmentation will be used.
|
|
@@ -1227,12 +1231,12 @@ class InferenceClient:
|
|
|
1227
1231
|
threshold (`float`, *optional*):
|
|
1228
1232
|
Probability threshold to filter out predicted masks.
|
|
1229
1233
|
Returns:
|
|
1230
|
-
`
|
|
1234
|
+
`list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
|
|
1231
1235
|
|
|
1232
1236
|
Raises:
|
|
1233
1237
|
[`InferenceTimeoutError`]:
|
|
1234
1238
|
If the model is unavailable or the request times out.
|
|
1235
|
-
`
|
|
1239
|
+
[`HfHubHTTPError`]:
|
|
1236
1240
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1237
1241
|
|
|
1238
1242
|
Example:
|
|
@@ -1258,6 +1262,7 @@ class InferenceClient:
|
|
|
1258
1262
|
api_key=self.token,
|
|
1259
1263
|
)
|
|
1260
1264
|
response = self._inner_post(request_parameters)
|
|
1265
|
+
response = provider_helper.get_response(response, request_parameters)
|
|
1261
1266
|
output = ImageSegmentationOutputElement.parse_obj_as_list(response)
|
|
1262
1267
|
for item in output:
|
|
1263
1268
|
item.mask = _b64_to_image(item.mask) # type: ignore [assignment]
|
|
@@ -1278,15 +1283,12 @@ class InferenceClient:
|
|
|
1278
1283
|
"""
|
|
1279
1284
|
Perform image-to-image translation using a specified model.
|
|
1280
1285
|
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
You must have `PIL` installed if you want to work with images (`pip install Pillow`).
|
|
1284
|
-
|
|
1285
|
-
</Tip>
|
|
1286
|
+
> [!WARNING]
|
|
1287
|
+
> You must have `PIL` installed if you want to work with images (`pip install Pillow`).
|
|
1286
1288
|
|
|
1287
1289
|
Args:
|
|
1288
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1289
|
-
The input image for translation. It can be raw bytes, an image file,
|
|
1290
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1291
|
+
The input image for translation. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1290
1292
|
prompt (`str`, *optional*):
|
|
1291
1293
|
The text prompt to guide the image generation.
|
|
1292
1294
|
negative_prompt (`str`, *optional*):
|
|
@@ -1301,7 +1303,8 @@ class InferenceClient:
|
|
|
1301
1303
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1302
1304
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
1303
1305
|
target_size (`ImageToImageTargetSize`, *optional*):
|
|
1304
|
-
The size in
|
|
1306
|
+
The size in pixels of the output image. This parameter is only supported by some providers and for
|
|
1307
|
+
specific models. It will be ignored when unsupported.
|
|
1305
1308
|
|
|
1306
1309
|
Returns:
|
|
1307
1310
|
`Image`: The translated image.
|
|
@@ -1309,7 +1312,7 @@ class InferenceClient:
|
|
|
1309
1312
|
Raises:
|
|
1310
1313
|
[`InferenceTimeoutError`]:
|
|
1311
1314
|
If the model is unavailable or the request times out.
|
|
1312
|
-
`
|
|
1315
|
+
[`HfHubHTTPError`]:
|
|
1313
1316
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1314
1317
|
|
|
1315
1318
|
Example:
|
|
@@ -1319,6 +1322,7 @@ class InferenceClient:
|
|
|
1319
1322
|
>>> image = client.image_to_image("cat.jpg", prompt="turn the cat into a tiger")
|
|
1320
1323
|
>>> image.save("tiger.jpg")
|
|
1321
1324
|
```
|
|
1325
|
+
|
|
1322
1326
|
"""
|
|
1323
1327
|
model_id = model or self.model
|
|
1324
1328
|
provider_helper = get_provider_helper(self.provider, task="image-to-image", model=model_id)
|
|
@@ -1337,18 +1341,98 @@ class InferenceClient:
|
|
|
1337
1341
|
api_key=self.token,
|
|
1338
1342
|
)
|
|
1339
1343
|
response = self._inner_post(request_parameters)
|
|
1344
|
+
response = provider_helper.get_response(response, request_parameters)
|
|
1340
1345
|
return _bytes_to_image(response)
|
|
1341
1346
|
|
|
1347
|
+
def image_to_video(
|
|
1348
|
+
self,
|
|
1349
|
+
image: ContentT,
|
|
1350
|
+
*,
|
|
1351
|
+
model: Optional[str] = None,
|
|
1352
|
+
prompt: Optional[str] = None,
|
|
1353
|
+
negative_prompt: Optional[str] = None,
|
|
1354
|
+
num_frames: Optional[float] = None,
|
|
1355
|
+
num_inference_steps: Optional[int] = None,
|
|
1356
|
+
guidance_scale: Optional[float] = None,
|
|
1357
|
+
seed: Optional[int] = None,
|
|
1358
|
+
target_size: Optional[ImageToVideoTargetSize] = None,
|
|
1359
|
+
**kwargs,
|
|
1360
|
+
) -> bytes:
|
|
1361
|
+
"""
|
|
1362
|
+
Generate a video from an input image.
|
|
1363
|
+
|
|
1364
|
+
Args:
|
|
1365
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1366
|
+
The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1367
|
+
model (`str`, *optional*):
|
|
1368
|
+
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1369
|
+
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
1370
|
+
prompt (`str`, *optional*):
|
|
1371
|
+
The text prompt to guide the video generation.
|
|
1372
|
+
negative_prompt (`str`, *optional*):
|
|
1373
|
+
One prompt to guide what NOT to include in video generation.
|
|
1374
|
+
num_frames (`float`, *optional*):
|
|
1375
|
+
The num_frames parameter determines how many video frames are generated.
|
|
1376
|
+
num_inference_steps (`int`, *optional*):
|
|
1377
|
+
For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
|
|
1378
|
+
quality image at the expense of slower inference.
|
|
1379
|
+
guidance_scale (`float`, *optional*):
|
|
1380
|
+
For diffusion models. A higher guidance scale value encourages the model to generate videos closely
|
|
1381
|
+
linked to the text prompt at the expense of lower image quality.
|
|
1382
|
+
seed (`int`, *optional*):
|
|
1383
|
+
The seed to use for the video generation.
|
|
1384
|
+
target_size (`ImageToVideoTargetSize`, *optional*):
|
|
1385
|
+
The size in pixel of the output video frames.
|
|
1386
|
+
num_inference_steps (`int`, *optional*):
|
|
1387
|
+
The number of denoising steps. More denoising steps usually lead to a higher quality video at the
|
|
1388
|
+
expense of slower inference.
|
|
1389
|
+
seed (`int`, *optional*):
|
|
1390
|
+
Seed for the random number generator.
|
|
1391
|
+
|
|
1392
|
+
Returns:
|
|
1393
|
+
`bytes`: The generated video.
|
|
1394
|
+
|
|
1395
|
+
Examples:
|
|
1396
|
+
```py
|
|
1397
|
+
>>> from huggingface_hub import InferenceClient
|
|
1398
|
+
>>> client = InferenceClient()
|
|
1399
|
+
>>> video = client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger")
|
|
1400
|
+
>>> with open("tiger.mp4", "wb") as f:
|
|
1401
|
+
... f.write(video)
|
|
1402
|
+
```
|
|
1403
|
+
"""
|
|
1404
|
+
model_id = model or self.model
|
|
1405
|
+
provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id)
|
|
1406
|
+
request_parameters = provider_helper.prepare_request(
|
|
1407
|
+
inputs=image,
|
|
1408
|
+
parameters={
|
|
1409
|
+
"prompt": prompt,
|
|
1410
|
+
"negative_prompt": negative_prompt,
|
|
1411
|
+
"num_frames": num_frames,
|
|
1412
|
+
"num_inference_steps": num_inference_steps,
|
|
1413
|
+
"guidance_scale": guidance_scale,
|
|
1414
|
+
"seed": seed,
|
|
1415
|
+
"target_size": target_size,
|
|
1416
|
+
**kwargs,
|
|
1417
|
+
},
|
|
1418
|
+
headers=self.headers,
|
|
1419
|
+
model=model_id,
|
|
1420
|
+
api_key=self.token,
|
|
1421
|
+
)
|
|
1422
|
+
response = self._inner_post(request_parameters)
|
|
1423
|
+
response = provider_helper.get_response(response, request_parameters)
|
|
1424
|
+
return response
|
|
1425
|
+
|
|
1342
1426
|
def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
|
|
1343
1427
|
"""
|
|
1344
1428
|
Takes an input image and return text.
|
|
1345
1429
|
|
|
1346
1430
|
Models can have very different outputs depending on your use case (image captioning, optical character recognition
|
|
1347
|
-
(OCR), Pix2Struct, etc). Please have a look to the model card to learn more about a model's specificities.
|
|
1431
|
+
(OCR), Pix2Struct, etc.). Please have a look to the model card to learn more about a model's specificities.
|
|
1348
1432
|
|
|
1349
1433
|
Args:
|
|
1350
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1351
|
-
The input image to caption. It can be raw bytes, an image file,
|
|
1434
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1435
|
+
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1352
1436
|
model (`str`, *optional*):
|
|
1353
1437
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1354
1438
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
@@ -1359,7 +1443,7 @@ class InferenceClient:
|
|
|
1359
1443
|
Raises:
|
|
1360
1444
|
[`InferenceTimeoutError`]:
|
|
1361
1445
|
If the model is unavailable or the request times out.
|
|
1362
|
-
`
|
|
1446
|
+
[`HfHubHTTPError`]:
|
|
1363
1447
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1364
1448
|
|
|
1365
1449
|
Example:
|
|
@@ -1382,36 +1466,33 @@ class InferenceClient:
|
|
|
1382
1466
|
api_key=self.token,
|
|
1383
1467
|
)
|
|
1384
1468
|
response = self._inner_post(request_parameters)
|
|
1385
|
-
|
|
1386
|
-
return
|
|
1469
|
+
output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
|
|
1470
|
+
return output_list[0]
|
|
1387
1471
|
|
|
1388
1472
|
def object_detection(
|
|
1389
1473
|
self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
|
|
1390
|
-
) ->
|
|
1474
|
+
) -> list[ObjectDetectionOutputElement]:
|
|
1391
1475
|
"""
|
|
1392
1476
|
Perform object detection on the given image using the specified model.
|
|
1393
1477
|
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
You must have `PIL` installed if you want to work with images (`pip install Pillow`).
|
|
1397
|
-
|
|
1398
|
-
</Tip>
|
|
1478
|
+
> [!WARNING]
|
|
1479
|
+
> You must have `PIL` installed if you want to work with images (`pip install Pillow`).
|
|
1399
1480
|
|
|
1400
1481
|
Args:
|
|
1401
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1402
|
-
The image to detect objects on. It can be raw bytes, an image file,
|
|
1482
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1483
|
+
The image to detect objects on. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1403
1484
|
model (`str`, *optional*):
|
|
1404
1485
|
The model to use for object detection. Can be a model ID hosted on the Hugging Face Hub or a URL to a
|
|
1405
1486
|
deployed Inference Endpoint. If not provided, the default recommended model for object detection (DETR) will be used.
|
|
1406
1487
|
threshold (`float`, *optional*):
|
|
1407
1488
|
The probability necessary to make a prediction.
|
|
1408
1489
|
Returns:
|
|
1409
|
-
`
|
|
1490
|
+
`list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
|
|
1410
1491
|
|
|
1411
1492
|
Raises:
|
|
1412
1493
|
[`InferenceTimeoutError`]:
|
|
1413
1494
|
If the model is unavailable or the request times out.
|
|
1414
|
-
`
|
|
1495
|
+
[`HfHubHTTPError`]:
|
|
1415
1496
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1416
1497
|
`ValueError`:
|
|
1417
1498
|
If the request output is not a List.
|
|
@@ -1449,7 +1530,7 @@ class InferenceClient:
|
|
|
1449
1530
|
max_question_len: Optional[int] = None,
|
|
1450
1531
|
max_seq_len: Optional[int] = None,
|
|
1451
1532
|
top_k: Optional[int] = None,
|
|
1452
|
-
) -> Union[QuestionAnsweringOutputElement,
|
|
1533
|
+
) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
|
|
1453
1534
|
"""
|
|
1454
1535
|
Retrieve the answer to a question from a given text.
|
|
1455
1536
|
|
|
@@ -1481,13 +1562,13 @@ class InferenceClient:
|
|
|
1481
1562
|
topk answers if there are not enough options available within the context.
|
|
1482
1563
|
|
|
1483
1564
|
Returns:
|
|
1484
|
-
Union[`QuestionAnsweringOutputElement`,
|
|
1565
|
+
Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
|
|
1485
1566
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
1486
1567
|
When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
|
|
1487
1568
|
Raises:
|
|
1488
1569
|
[`InferenceTimeoutError`]:
|
|
1489
1570
|
If the model is unavailable or the request times out.
|
|
1490
|
-
`
|
|
1571
|
+
[`HfHubHTTPError`]:
|
|
1491
1572
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1492
1573
|
|
|
1493
1574
|
Example:
|
|
@@ -1501,7 +1582,7 @@ class InferenceClient:
|
|
|
1501
1582
|
model_id = model or self.model
|
|
1502
1583
|
provider_helper = get_provider_helper(self.provider, task="question-answering", model=model_id)
|
|
1503
1584
|
request_parameters = provider_helper.prepare_request(
|
|
1504
|
-
inputs=
|
|
1585
|
+
inputs={"question": question, "context": context},
|
|
1505
1586
|
parameters={
|
|
1506
1587
|
"align_to_words": align_to_words,
|
|
1507
1588
|
"doc_stride": doc_stride,
|
|
@@ -1511,7 +1592,6 @@ class InferenceClient:
|
|
|
1511
1592
|
"max_seq_len": max_seq_len,
|
|
1512
1593
|
"top_k": top_k,
|
|
1513
1594
|
},
|
|
1514
|
-
extra_payload={"question": question, "context": context},
|
|
1515
1595
|
headers=self.headers,
|
|
1516
1596
|
model=model_id,
|
|
1517
1597
|
api_key=self.token,
|
|
@@ -1522,15 +1602,15 @@ class InferenceClient:
|
|
|
1522
1602
|
return output
|
|
1523
1603
|
|
|
1524
1604
|
def sentence_similarity(
|
|
1525
|
-
self, sentence: str, other_sentences:
|
|
1526
|
-
) ->
|
|
1605
|
+
self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
|
|
1606
|
+
) -> list[float]:
|
|
1527
1607
|
"""
|
|
1528
1608
|
Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
|
|
1529
1609
|
|
|
1530
1610
|
Args:
|
|
1531
1611
|
sentence (`str`):
|
|
1532
1612
|
The main sentence to compare to others.
|
|
1533
|
-
other_sentences (`
|
|
1613
|
+
other_sentences (`list[str]`):
|
|
1534
1614
|
The list of sentences to compare to.
|
|
1535
1615
|
model (`str`, *optional*):
|
|
1536
1616
|
The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1538,12 +1618,12 @@ class InferenceClient:
|
|
|
1538
1618
|
Defaults to None.
|
|
1539
1619
|
|
|
1540
1620
|
Returns:
|
|
1541
|
-
`
|
|
1621
|
+
`list[float]`: The embedding representing the input text.
|
|
1542
1622
|
|
|
1543
1623
|
Raises:
|
|
1544
1624
|
[`InferenceTimeoutError`]:
|
|
1545
1625
|
If the model is unavailable or the request times out.
|
|
1546
|
-
`
|
|
1626
|
+
[`HfHubHTTPError`]:
|
|
1547
1627
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1548
1628
|
|
|
1549
1629
|
Example:
|
|
@@ -1580,7 +1660,7 @@ class InferenceClient:
|
|
|
1580
1660
|
*,
|
|
1581
1661
|
model: Optional[str] = None,
|
|
1582
1662
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1583
|
-
generate_parameters: Optional[
|
|
1663
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
1584
1664
|
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1585
1665
|
) -> SummarizationOutput:
|
|
1586
1666
|
"""
|
|
@@ -1594,7 +1674,7 @@ class InferenceClient:
|
|
|
1594
1674
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
1595
1675
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
1596
1676
|
Whether to clean up the potential extra spaces in the text output.
|
|
1597
|
-
generate_parameters (`
|
|
1677
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
1598
1678
|
Additional parametrization of the text generation algorithm.
|
|
1599
1679
|
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1600
1680
|
The truncation strategy to use.
|
|
@@ -1604,7 +1684,7 @@ class InferenceClient:
|
|
|
1604
1684
|
Raises:
|
|
1605
1685
|
[`InferenceTimeoutError`]:
|
|
1606
1686
|
If the model is unavailable or the request times out.
|
|
1607
|
-
`
|
|
1687
|
+
[`HfHubHTTPError`]:
|
|
1608
1688
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1609
1689
|
|
|
1610
1690
|
Example:
|
|
@@ -1634,7 +1714,7 @@ class InferenceClient:
|
|
|
1634
1714
|
|
|
1635
1715
|
def table_question_answering(
|
|
1636
1716
|
self,
|
|
1637
|
-
table:
|
|
1717
|
+
table: dict[str, Any],
|
|
1638
1718
|
query: str,
|
|
1639
1719
|
*,
|
|
1640
1720
|
model: Optional[str] = None,
|
|
@@ -1669,7 +1749,7 @@ class InferenceClient:
|
|
|
1669
1749
|
Raises:
|
|
1670
1750
|
[`InferenceTimeoutError`]:
|
|
1671
1751
|
If the model is unavailable or the request times out.
|
|
1672
|
-
`
|
|
1752
|
+
[`HfHubHTTPError`]:
|
|
1673
1753
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1674
1754
|
|
|
1675
1755
|
Example:
|
|
@@ -1685,9 +1765,8 @@ class InferenceClient:
|
|
|
1685
1765
|
model_id = model or self.model
|
|
1686
1766
|
provider_helper = get_provider_helper(self.provider, task="table-question-answering", model=model_id)
|
|
1687
1767
|
request_parameters = provider_helper.prepare_request(
|
|
1688
|
-
inputs=
|
|
1768
|
+
inputs={"query": query, "table": table},
|
|
1689
1769
|
parameters={"model": model, "padding": padding, "sequential": sequential, "truncation": truncation},
|
|
1690
|
-
extra_payload={"query": query, "table": table},
|
|
1691
1770
|
headers=self.headers,
|
|
1692
1771
|
model=model_id,
|
|
1693
1772
|
api_key=self.token,
|
|
@@ -1695,12 +1774,12 @@ class InferenceClient:
|
|
|
1695
1774
|
response = self._inner_post(request_parameters)
|
|
1696
1775
|
return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
|
|
1697
1776
|
|
|
1698
|
-
def tabular_classification(self, table:
|
|
1777
|
+
def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
|
|
1699
1778
|
"""
|
|
1700
1779
|
Classifying a target category (a group) based on a set of attributes.
|
|
1701
1780
|
|
|
1702
1781
|
Args:
|
|
1703
|
-
table (`
|
|
1782
|
+
table (`dict[str, Any]`):
|
|
1704
1783
|
Set of attributes to classify.
|
|
1705
1784
|
model (`str`, *optional*):
|
|
1706
1785
|
The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1713,7 +1792,7 @@ class InferenceClient:
|
|
|
1713
1792
|
Raises:
|
|
1714
1793
|
[`InferenceTimeoutError`]:
|
|
1715
1794
|
If the model is unavailable or the request times out.
|
|
1716
|
-
`
|
|
1795
|
+
[`HfHubHTTPError`]:
|
|
1717
1796
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1718
1797
|
|
|
1719
1798
|
Example:
|
|
@@ -1750,12 +1829,12 @@ class InferenceClient:
|
|
|
1750
1829
|
response = self._inner_post(request_parameters)
|
|
1751
1830
|
return _bytes_to_list(response)
|
|
1752
1831
|
|
|
1753
|
-
def tabular_regression(self, table:
|
|
1832
|
+
def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
|
|
1754
1833
|
"""
|
|
1755
1834
|
Predicting a numerical target value given a set of attributes/features in a table.
|
|
1756
1835
|
|
|
1757
1836
|
Args:
|
|
1758
|
-
table (`
|
|
1837
|
+
table (`dict[str, Any]`):
|
|
1759
1838
|
Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
|
|
1760
1839
|
model (`str`, *optional*):
|
|
1761
1840
|
The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1768,7 +1847,7 @@ class InferenceClient:
|
|
|
1768
1847
|
Raises:
|
|
1769
1848
|
[`InferenceTimeoutError`]:
|
|
1770
1849
|
If the model is unavailable or the request times out.
|
|
1771
|
-
`
|
|
1850
|
+
[`HfHubHTTPError`]:
|
|
1772
1851
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1773
1852
|
|
|
1774
1853
|
Example:
|
|
@@ -1807,7 +1886,7 @@ class InferenceClient:
|
|
|
1807
1886
|
model: Optional[str] = None,
|
|
1808
1887
|
top_k: Optional[int] = None,
|
|
1809
1888
|
function_to_apply: Optional["TextClassificationOutputTransform"] = None,
|
|
1810
|
-
) ->
|
|
1889
|
+
) -> list[TextClassificationOutputElement]:
|
|
1811
1890
|
"""
|
|
1812
1891
|
Perform text classification (e.g. sentiment-analysis) on the given text.
|
|
1813
1892
|
|
|
@@ -1824,12 +1903,12 @@ class InferenceClient:
|
|
|
1824
1903
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
1825
1904
|
|
|
1826
1905
|
Returns:
|
|
1827
|
-
`
|
|
1906
|
+
`list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1828
1907
|
|
|
1829
1908
|
Raises:
|
|
1830
1909
|
[`InferenceTimeoutError`]:
|
|
1831
1910
|
If the model is unavailable or the request times out.
|
|
1832
|
-
`
|
|
1911
|
+
[`HfHubHTTPError`]:
|
|
1833
1912
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1834
1913
|
|
|
1835
1914
|
Example:
|
|
@@ -1859,26 +1938,26 @@ class InferenceClient:
|
|
|
1859
1938
|
return TextClassificationOutputElement.parse_obj_as_list(response)[0] # type: ignore [return-value]
|
|
1860
1939
|
|
|
1861
1940
|
@overload
|
|
1862
|
-
def text_generation(
|
|
1941
|
+
def text_generation(
|
|
1863
1942
|
self,
|
|
1864
1943
|
prompt: str,
|
|
1865
1944
|
*,
|
|
1866
|
-
details: Literal[
|
|
1867
|
-
stream: Literal[
|
|
1945
|
+
details: Literal[True],
|
|
1946
|
+
stream: Literal[True],
|
|
1868
1947
|
model: Optional[str] = None,
|
|
1869
1948
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
1870
1949
|
adapter_id: Optional[str] = None,
|
|
1871
1950
|
best_of: Optional[int] = None,
|
|
1872
1951
|
decoder_input_details: Optional[bool] = None,
|
|
1873
|
-
do_sample: Optional[bool] =
|
|
1952
|
+
do_sample: Optional[bool] = None,
|
|
1874
1953
|
frequency_penalty: Optional[float] = None,
|
|
1875
1954
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
1876
1955
|
max_new_tokens: Optional[int] = None,
|
|
1877
1956
|
repetition_penalty: Optional[float] = None,
|
|
1878
|
-
return_full_text: Optional[bool] =
|
|
1957
|
+
return_full_text: Optional[bool] = None,
|
|
1879
1958
|
seed: Optional[int] = None,
|
|
1880
|
-
stop: Optional[
|
|
1881
|
-
stop_sequences: Optional[
|
|
1959
|
+
stop: Optional[list[str]] = None,
|
|
1960
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1882
1961
|
temperature: Optional[float] = None,
|
|
1883
1962
|
top_k: Optional[int] = None,
|
|
1884
1963
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1886,29 +1965,29 @@ class InferenceClient:
|
|
|
1886
1965
|
truncate: Optional[int] = None,
|
|
1887
1966
|
typical_p: Optional[float] = None,
|
|
1888
1967
|
watermark: Optional[bool] = None,
|
|
1889
|
-
) ->
|
|
1968
|
+
) -> Iterable[TextGenerationStreamOutput]: ...
|
|
1890
1969
|
|
|
1891
1970
|
@overload
|
|
1892
|
-
def text_generation(
|
|
1971
|
+
def text_generation(
|
|
1893
1972
|
self,
|
|
1894
1973
|
prompt: str,
|
|
1895
1974
|
*,
|
|
1896
|
-
details: Literal[True]
|
|
1897
|
-
stream: Literal[False] =
|
|
1975
|
+
details: Literal[True],
|
|
1976
|
+
stream: Optional[Literal[False]] = None,
|
|
1898
1977
|
model: Optional[str] = None,
|
|
1899
1978
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
1900
1979
|
adapter_id: Optional[str] = None,
|
|
1901
1980
|
best_of: Optional[int] = None,
|
|
1902
1981
|
decoder_input_details: Optional[bool] = None,
|
|
1903
|
-
do_sample: Optional[bool] =
|
|
1982
|
+
do_sample: Optional[bool] = None,
|
|
1904
1983
|
frequency_penalty: Optional[float] = None,
|
|
1905
1984
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
1906
1985
|
max_new_tokens: Optional[int] = None,
|
|
1907
1986
|
repetition_penalty: Optional[float] = None,
|
|
1908
|
-
return_full_text: Optional[bool] =
|
|
1987
|
+
return_full_text: Optional[bool] = None,
|
|
1909
1988
|
seed: Optional[int] = None,
|
|
1910
|
-
stop: Optional[
|
|
1911
|
-
stop_sequences: Optional[
|
|
1989
|
+
stop: Optional[list[str]] = None,
|
|
1990
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1912
1991
|
temperature: Optional[float] = None,
|
|
1913
1992
|
top_k: Optional[int] = None,
|
|
1914
1993
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1919,26 +1998,26 @@ class InferenceClient:
|
|
|
1919
1998
|
) -> TextGenerationOutput: ...
|
|
1920
1999
|
|
|
1921
2000
|
@overload
|
|
1922
|
-
def text_generation(
|
|
2001
|
+
def text_generation(
|
|
1923
2002
|
self,
|
|
1924
2003
|
prompt: str,
|
|
1925
2004
|
*,
|
|
1926
|
-
details: Literal[False] =
|
|
1927
|
-
stream: Literal[True]
|
|
2005
|
+
details: Optional[Literal[False]] = None,
|
|
2006
|
+
stream: Literal[True],
|
|
1928
2007
|
model: Optional[str] = None,
|
|
1929
2008
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
1930
2009
|
adapter_id: Optional[str] = None,
|
|
1931
2010
|
best_of: Optional[int] = None,
|
|
1932
2011
|
decoder_input_details: Optional[bool] = None,
|
|
1933
|
-
do_sample: Optional[bool] =
|
|
2012
|
+
do_sample: Optional[bool] = None,
|
|
1934
2013
|
frequency_penalty: Optional[float] = None,
|
|
1935
2014
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
1936
2015
|
max_new_tokens: Optional[int] = None,
|
|
1937
2016
|
repetition_penalty: Optional[float] = None,
|
|
1938
|
-
return_full_text: Optional[bool] =
|
|
2017
|
+
return_full_text: Optional[bool] = None, # Manual default value
|
|
1939
2018
|
seed: Optional[int] = None,
|
|
1940
|
-
stop: Optional[
|
|
1941
|
-
stop_sequences: Optional[
|
|
2019
|
+
stop: Optional[list[str]] = None,
|
|
2020
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1942
2021
|
temperature: Optional[float] = None,
|
|
1943
2022
|
top_k: Optional[int] = None,
|
|
1944
2023
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1949,26 +2028,26 @@ class InferenceClient:
|
|
|
1949
2028
|
) -> Iterable[str]: ...
|
|
1950
2029
|
|
|
1951
2030
|
@overload
|
|
1952
|
-
def text_generation(
|
|
2031
|
+
def text_generation(
|
|
1953
2032
|
self,
|
|
1954
2033
|
prompt: str,
|
|
1955
2034
|
*,
|
|
1956
|
-
details: Literal[
|
|
1957
|
-
stream: Literal[
|
|
2035
|
+
details: Optional[Literal[False]] = None,
|
|
2036
|
+
stream: Optional[Literal[False]] = None,
|
|
1958
2037
|
model: Optional[str] = None,
|
|
1959
2038
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
1960
2039
|
adapter_id: Optional[str] = None,
|
|
1961
2040
|
best_of: Optional[int] = None,
|
|
1962
2041
|
decoder_input_details: Optional[bool] = None,
|
|
1963
|
-
do_sample: Optional[bool] =
|
|
2042
|
+
do_sample: Optional[bool] = None,
|
|
1964
2043
|
frequency_penalty: Optional[float] = None,
|
|
1965
2044
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
1966
2045
|
max_new_tokens: Optional[int] = None,
|
|
1967
2046
|
repetition_penalty: Optional[float] = None,
|
|
1968
|
-
return_full_text: Optional[bool] =
|
|
2047
|
+
return_full_text: Optional[bool] = None,
|
|
1969
2048
|
seed: Optional[int] = None,
|
|
1970
|
-
stop: Optional[
|
|
1971
|
-
stop_sequences: Optional[
|
|
2049
|
+
stop: Optional[list[str]] = None,
|
|
2050
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1972
2051
|
temperature: Optional[float] = None,
|
|
1973
2052
|
top_k: Optional[int] = None,
|
|
1974
2053
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1976,29 +2055,29 @@ class InferenceClient:
|
|
|
1976
2055
|
truncate: Optional[int] = None,
|
|
1977
2056
|
typical_p: Optional[float] = None,
|
|
1978
2057
|
watermark: Optional[bool] = None,
|
|
1979
|
-
) ->
|
|
2058
|
+
) -> str: ...
|
|
1980
2059
|
|
|
1981
2060
|
@overload
|
|
1982
2061
|
def text_generation(
|
|
1983
2062
|
self,
|
|
1984
2063
|
prompt: str,
|
|
1985
2064
|
*,
|
|
1986
|
-
details:
|
|
1987
|
-
stream: bool =
|
|
2065
|
+
details: Optional[bool] = None,
|
|
2066
|
+
stream: Optional[bool] = None,
|
|
1988
2067
|
model: Optional[str] = None,
|
|
1989
2068
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
1990
2069
|
adapter_id: Optional[str] = None,
|
|
1991
2070
|
best_of: Optional[int] = None,
|
|
1992
2071
|
decoder_input_details: Optional[bool] = None,
|
|
1993
|
-
do_sample: Optional[bool] =
|
|
2072
|
+
do_sample: Optional[bool] = None,
|
|
1994
2073
|
frequency_penalty: Optional[float] = None,
|
|
1995
2074
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
1996
2075
|
max_new_tokens: Optional[int] = None,
|
|
1997
2076
|
repetition_penalty: Optional[float] = None,
|
|
1998
|
-
return_full_text: Optional[bool] =
|
|
2077
|
+
return_full_text: Optional[bool] = None,
|
|
1999
2078
|
seed: Optional[int] = None,
|
|
2000
|
-
stop: Optional[
|
|
2001
|
-
stop_sequences: Optional[
|
|
2079
|
+
stop: Optional[list[str]] = None,
|
|
2080
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2002
2081
|
temperature: Optional[float] = None,
|
|
2003
2082
|
top_k: Optional[int] = None,
|
|
2004
2083
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2006,28 +2085,28 @@ class InferenceClient:
|
|
|
2006
2085
|
truncate: Optional[int] = None,
|
|
2007
2086
|
typical_p: Optional[float] = None,
|
|
2008
2087
|
watermark: Optional[bool] = None,
|
|
2009
|
-
) -> Union[TextGenerationOutput, Iterable[TextGenerationStreamOutput]]: ...
|
|
2088
|
+
) -> Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]: ...
|
|
2010
2089
|
|
|
2011
2090
|
def text_generation(
|
|
2012
2091
|
self,
|
|
2013
2092
|
prompt: str,
|
|
2014
2093
|
*,
|
|
2015
|
-
details: bool =
|
|
2016
|
-
stream: bool =
|
|
2094
|
+
details: Optional[bool] = None,
|
|
2095
|
+
stream: Optional[bool] = None,
|
|
2017
2096
|
model: Optional[str] = None,
|
|
2018
2097
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
2019
2098
|
adapter_id: Optional[str] = None,
|
|
2020
2099
|
best_of: Optional[int] = None,
|
|
2021
2100
|
decoder_input_details: Optional[bool] = None,
|
|
2022
|
-
do_sample: Optional[bool] =
|
|
2101
|
+
do_sample: Optional[bool] = None,
|
|
2023
2102
|
frequency_penalty: Optional[float] = None,
|
|
2024
2103
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
2025
2104
|
max_new_tokens: Optional[int] = None,
|
|
2026
2105
|
repetition_penalty: Optional[float] = None,
|
|
2027
|
-
return_full_text: Optional[bool] =
|
|
2106
|
+
return_full_text: Optional[bool] = None,
|
|
2028
2107
|
seed: Optional[int] = None,
|
|
2029
|
-
stop: Optional[
|
|
2030
|
-
stop_sequences: Optional[
|
|
2108
|
+
stop: Optional[list[str]] = None,
|
|
2109
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2031
2110
|
temperature: Optional[float] = None,
|
|
2032
2111
|
top_k: Optional[int] = None,
|
|
2033
2112
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2039,12 +2118,9 @@ class InferenceClient:
|
|
|
2039
2118
|
"""
|
|
2040
2119
|
Given a prompt, generate the following text.
|
|
2041
2120
|
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
It accepts a list of messages instead of a single text prompt and handles the chat templating for you.
|
|
2046
|
-
|
|
2047
|
-
</Tip>
|
|
2121
|
+
> [!TIP]
|
|
2122
|
+
> If you want to generate a response from chat messages, you should use the [`InferenceClient.chat_completion`] method.
|
|
2123
|
+
> It accepts a list of messages instead of a single text prompt and handles the chat templating for you.
|
|
2048
2124
|
|
|
2049
2125
|
Args:
|
|
2050
2126
|
prompt (`str`):
|
|
@@ -2083,9 +2159,9 @@ class InferenceClient:
|
|
|
2083
2159
|
Whether to prepend the prompt to the generated text
|
|
2084
2160
|
seed (`int`, *optional*):
|
|
2085
2161
|
Random sampling seed
|
|
2086
|
-
stop (`
|
|
2162
|
+
stop (`list[str]`, *optional*):
|
|
2087
2163
|
Stop generating tokens if a member of `stop` is generated.
|
|
2088
|
-
stop_sequences (`
|
|
2164
|
+
stop_sequences (`list[str]`, *optional*):
|
|
2089
2165
|
Deprecated argument. Use `stop` instead.
|
|
2090
2166
|
temperature (`float`, *optional*):
|
|
2091
2167
|
The value used to module the logits distribution.
|
|
@@ -2102,7 +2178,7 @@ class InferenceClient:
|
|
|
2102
2178
|
typical_p (`float`, *optional`):
|
|
2103
2179
|
Typical Decoding mass
|
|
2104
2180
|
See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information
|
|
2105
|
-
watermark (`bool`, *optional
|
|
2181
|
+
watermark (`bool`, *optional*):
|
|
2106
2182
|
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
|
|
2107
2183
|
|
|
2108
2184
|
Returns:
|
|
@@ -2118,7 +2194,7 @@ class InferenceClient:
|
|
|
2118
2194
|
If input values are not valid. No HTTP call is made to the server.
|
|
2119
2195
|
[`InferenceTimeoutError`]:
|
|
2120
2196
|
If the model is unavailable or the request times out.
|
|
2121
|
-
`
|
|
2197
|
+
[`HfHubHTTPError`]:
|
|
2122
2198
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2123
2199
|
|
|
2124
2200
|
Example:
|
|
@@ -2252,7 +2328,7 @@ class InferenceClient:
|
|
|
2252
2328
|
"repetition_penalty": repetition_penalty,
|
|
2253
2329
|
"return_full_text": return_full_text,
|
|
2254
2330
|
"seed": seed,
|
|
2255
|
-
"stop": stop
|
|
2331
|
+
"stop": stop,
|
|
2256
2332
|
"temperature": temperature,
|
|
2257
2333
|
"top_k": top_k,
|
|
2258
2334
|
"top_n_tokens": top_n_tokens,
|
|
@@ -2306,8 +2382,8 @@ class InferenceClient:
|
|
|
2306
2382
|
|
|
2307
2383
|
# Handle errors separately for more precise error messages
|
|
2308
2384
|
try:
|
|
2309
|
-
bytes_output = self._inner_post(request_parameters, stream=stream)
|
|
2310
|
-
except
|
|
2385
|
+
bytes_output = self._inner_post(request_parameters, stream=stream or False)
|
|
2386
|
+
except HfHubHTTPError as e:
|
|
2311
2387
|
match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
|
|
2312
2388
|
if isinstance(e, BadRequestError) and match:
|
|
2313
2389
|
unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
|
|
@@ -2362,20 +2438,16 @@ class InferenceClient:
|
|
|
2362
2438
|
model: Optional[str] = None,
|
|
2363
2439
|
scheduler: Optional[str] = None,
|
|
2364
2440
|
seed: Optional[int] = None,
|
|
2365
|
-
extra_body: Optional[
|
|
2441
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2366
2442
|
) -> "Image":
|
|
2367
2443
|
"""
|
|
2368
2444
|
Generate an image based on a given text using a specified model.
|
|
2369
2445
|
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
You must have `PIL` installed if you want to work with images (`pip install Pillow`).
|
|
2373
|
-
|
|
2374
|
-
</Tip>
|
|
2446
|
+
> [!WARNING]
|
|
2447
|
+
> You must have `PIL` installed if you want to work with images (`pip install Pillow`).
|
|
2375
2448
|
|
|
2376
|
-
|
|
2377
|
-
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2378
|
-
</Tip>
|
|
2449
|
+
> [!TIP]
|
|
2450
|
+
> You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2379
2451
|
|
|
2380
2452
|
Args:
|
|
2381
2453
|
prompt (`str`):
|
|
@@ -2400,7 +2472,7 @@ class InferenceClient:
|
|
|
2400
2472
|
Override the scheduler with a compatible one.
|
|
2401
2473
|
seed (`int`, *optional*):
|
|
2402
2474
|
Seed for the random number generator.
|
|
2403
|
-
extra_body (`
|
|
2475
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2404
2476
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2405
2477
|
for supported parameters.
|
|
2406
2478
|
|
|
@@ -2410,7 +2482,7 @@ class InferenceClient:
|
|
|
2410
2482
|
Raises:
|
|
2411
2483
|
[`InferenceTimeoutError`]:
|
|
2412
2484
|
If the model is unavailable or the request times out.
|
|
2413
|
-
`
|
|
2485
|
+
[`HfHubHTTPError`]:
|
|
2414
2486
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2415
2487
|
|
|
2416
2488
|
Example:
|
|
@@ -2470,6 +2542,7 @@ class InferenceClient:
|
|
|
2470
2542
|
... )
|
|
2471
2543
|
>>> image.save("astronaut.png")
|
|
2472
2544
|
```
|
|
2545
|
+
|
|
2473
2546
|
"""
|
|
2474
2547
|
model_id = model or self.model
|
|
2475
2548
|
provider_helper = get_provider_helper(self.provider, task="text-to-image", model=model_id)
|
|
@@ -2490,7 +2563,7 @@ class InferenceClient:
|
|
|
2490
2563
|
api_key=self.token,
|
|
2491
2564
|
)
|
|
2492
2565
|
response = self._inner_post(request_parameters)
|
|
2493
|
-
response = provider_helper.get_response(response)
|
|
2566
|
+
response = provider_helper.get_response(response, request_parameters)
|
|
2494
2567
|
return _bytes_to_image(response)
|
|
2495
2568
|
|
|
2496
2569
|
def text_to_video(
|
|
@@ -2499,18 +2572,17 @@ class InferenceClient:
|
|
|
2499
2572
|
*,
|
|
2500
2573
|
model: Optional[str] = None,
|
|
2501
2574
|
guidance_scale: Optional[float] = None,
|
|
2502
|
-
negative_prompt: Optional[
|
|
2575
|
+
negative_prompt: Optional[list[str]] = None,
|
|
2503
2576
|
num_frames: Optional[float] = None,
|
|
2504
2577
|
num_inference_steps: Optional[int] = None,
|
|
2505
2578
|
seed: Optional[int] = None,
|
|
2506
|
-
extra_body: Optional[
|
|
2579
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2507
2580
|
) -> bytes:
|
|
2508
2581
|
"""
|
|
2509
2582
|
Generate a video based on a given text.
|
|
2510
2583
|
|
|
2511
|
-
|
|
2512
|
-
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2513
|
-
</Tip>
|
|
2584
|
+
> [!TIP]
|
|
2585
|
+
> You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2514
2586
|
|
|
2515
2587
|
Args:
|
|
2516
2588
|
prompt (`str`):
|
|
@@ -2522,7 +2594,7 @@ class InferenceClient:
|
|
|
2522
2594
|
guidance_scale (`float`, *optional*):
|
|
2523
2595
|
A higher guidance scale value encourages the model to generate videos closely linked to the text
|
|
2524
2596
|
prompt, but values too high may cause saturation and other artifacts.
|
|
2525
|
-
negative_prompt (`
|
|
2597
|
+
negative_prompt (`list[str]`, *optional*):
|
|
2526
2598
|
One or several prompt to guide what NOT to include in video generation.
|
|
2527
2599
|
num_frames (`float`, *optional*):
|
|
2528
2600
|
The num_frames parameter determines how many video frames are generated.
|
|
@@ -2531,7 +2603,7 @@ class InferenceClient:
|
|
|
2531
2603
|
expense of slower inference.
|
|
2532
2604
|
seed (`int`, *optional*):
|
|
2533
2605
|
Seed for the random number generator.
|
|
2534
|
-
extra_body (`
|
|
2606
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2535
2607
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2536
2608
|
for supported parameters.
|
|
2537
2609
|
|
|
@@ -2569,6 +2641,7 @@ class InferenceClient:
|
|
|
2569
2641
|
>>> with open("cat.mp4", "wb") as file:
|
|
2570
2642
|
... file.write(video)
|
|
2571
2643
|
```
|
|
2644
|
+
|
|
2572
2645
|
"""
|
|
2573
2646
|
model_id = model or self.model
|
|
2574
2647
|
provider_helper = get_provider_helper(self.provider, task="text-to-video", model=model_id)
|
|
@@ -2611,14 +2684,13 @@ class InferenceClient:
|
|
|
2611
2684
|
top_p: Optional[float] = None,
|
|
2612
2685
|
typical_p: Optional[float] = None,
|
|
2613
2686
|
use_cache: Optional[bool] = None,
|
|
2614
|
-
extra_body: Optional[
|
|
2687
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2615
2688
|
) -> bytes:
|
|
2616
2689
|
"""
|
|
2617
2690
|
Synthesize an audio of a voice pronouncing a given text.
|
|
2618
2691
|
|
|
2619
|
-
|
|
2620
|
-
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2621
|
-
</Tip>
|
|
2692
|
+
> [!TIP]
|
|
2693
|
+
> You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2622
2694
|
|
|
2623
2695
|
Args:
|
|
2624
2696
|
text (`str`):
|
|
@@ -2673,7 +2745,7 @@ class InferenceClient:
|
|
|
2673
2745
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2674
2746
|
use_cache (`bool`, *optional*):
|
|
2675
2747
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2676
|
-
extra_body (`
|
|
2748
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2677
2749
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2678
2750
|
for supported parameters.
|
|
2679
2751
|
Returns:
|
|
@@ -2682,7 +2754,7 @@ class InferenceClient:
|
|
|
2682
2754
|
Raises:
|
|
2683
2755
|
[`InferenceTimeoutError`]:
|
|
2684
2756
|
If the model is unavailable or the request times out.
|
|
2685
|
-
`
|
|
2757
|
+
[`HfHubHTTPError`]:
|
|
2686
2758
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2687
2759
|
|
|
2688
2760
|
Example:
|
|
@@ -2805,9 +2877,9 @@ class InferenceClient:
|
|
|
2805
2877
|
*,
|
|
2806
2878
|
model: Optional[str] = None,
|
|
2807
2879
|
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2808
|
-
ignore_labels: Optional[
|
|
2880
|
+
ignore_labels: Optional[list[str]] = None,
|
|
2809
2881
|
stride: Optional[int] = None,
|
|
2810
|
-
) ->
|
|
2882
|
+
) -> list[TokenClassificationOutputElement]:
|
|
2811
2883
|
"""
|
|
2812
2884
|
Perform token classification on the given text.
|
|
2813
2885
|
Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
|
|
@@ -2821,18 +2893,18 @@ class InferenceClient:
|
|
|
2821
2893
|
Defaults to None.
|
|
2822
2894
|
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2823
2895
|
The strategy used to fuse tokens based on model predictions
|
|
2824
|
-
ignore_labels (`
|
|
2896
|
+
ignore_labels (`list[str`, *optional*):
|
|
2825
2897
|
A list of labels to ignore
|
|
2826
2898
|
stride (`int`, *optional*):
|
|
2827
2899
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2828
2900
|
|
|
2829
2901
|
Returns:
|
|
2830
|
-
`
|
|
2902
|
+
`list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
|
|
2831
2903
|
|
|
2832
2904
|
Raises:
|
|
2833
2905
|
[`InferenceTimeoutError`]:
|
|
2834
2906
|
If the model is unavailable or the request times out.
|
|
2835
|
-
`
|
|
2907
|
+
[`HfHubHTTPError`]:
|
|
2836
2908
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2837
2909
|
|
|
2838
2910
|
Example:
|
|
@@ -2883,7 +2955,7 @@ class InferenceClient:
|
|
|
2883
2955
|
tgt_lang: Optional[str] = None,
|
|
2884
2956
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2885
2957
|
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
2886
|
-
generate_parameters: Optional[
|
|
2958
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
2887
2959
|
) -> TranslationOutput:
|
|
2888
2960
|
"""
|
|
2889
2961
|
Convert text from one language to another.
|
|
@@ -2908,7 +2980,7 @@ class InferenceClient:
|
|
|
2908
2980
|
Whether to clean up the potential extra spaces in the text output.
|
|
2909
2981
|
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
2910
2982
|
The truncation strategy to use.
|
|
2911
|
-
generate_parameters (`
|
|
2983
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
2912
2984
|
Additional parametrization of the text generation algorithm.
|
|
2913
2985
|
|
|
2914
2986
|
Returns:
|
|
@@ -2917,7 +2989,7 @@ class InferenceClient:
|
|
|
2917
2989
|
Raises:
|
|
2918
2990
|
[`InferenceTimeoutError`]:
|
|
2919
2991
|
If the model is unavailable or the request times out.
|
|
2920
|
-
`
|
|
2992
|
+
[`HfHubHTTPError`]:
|
|
2921
2993
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2922
2994
|
`ValueError`:
|
|
2923
2995
|
If only one of the `src_lang` and `tgt_lang` arguments are provided.
|
|
@@ -2970,13 +3042,13 @@ class InferenceClient:
|
|
|
2970
3042
|
*,
|
|
2971
3043
|
model: Optional[str] = None,
|
|
2972
3044
|
top_k: Optional[int] = None,
|
|
2973
|
-
) ->
|
|
3045
|
+
) -> list[VisualQuestionAnsweringOutputElement]:
|
|
2974
3046
|
"""
|
|
2975
3047
|
Answering open-ended questions based on an image.
|
|
2976
3048
|
|
|
2977
3049
|
Args:
|
|
2978
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
2979
|
-
The input image for the context. It can be raw bytes, an image file,
|
|
3050
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3051
|
+
The input image for the context. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
2980
3052
|
question (`str`):
|
|
2981
3053
|
Question to be answered.
|
|
2982
3054
|
model (`str`, *optional*):
|
|
@@ -2987,12 +3059,12 @@ class InferenceClient:
|
|
|
2987
3059
|
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
2988
3060
|
topk answers if there are not enough options available within the context.
|
|
2989
3061
|
Returns:
|
|
2990
|
-
`
|
|
3062
|
+
`list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
2991
3063
|
|
|
2992
3064
|
Raises:
|
|
2993
3065
|
`InferenceTimeoutError`:
|
|
2994
3066
|
If the model is unavailable or the request times out.
|
|
2995
|
-
`
|
|
3067
|
+
[`HfHubHTTPError`]:
|
|
2996
3068
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2997
3069
|
|
|
2998
3070
|
Example:
|
|
@@ -3025,21 +3097,21 @@ class InferenceClient:
|
|
|
3025
3097
|
def zero_shot_classification(
|
|
3026
3098
|
self,
|
|
3027
3099
|
text: str,
|
|
3028
|
-
candidate_labels:
|
|
3100
|
+
candidate_labels: list[str],
|
|
3029
3101
|
*,
|
|
3030
3102
|
multi_label: Optional[bool] = False,
|
|
3031
3103
|
hypothesis_template: Optional[str] = None,
|
|
3032
3104
|
model: Optional[str] = None,
|
|
3033
|
-
) ->
|
|
3105
|
+
) -> list[ZeroShotClassificationOutputElement]:
|
|
3034
3106
|
"""
|
|
3035
3107
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
3036
3108
|
|
|
3037
3109
|
Args:
|
|
3038
3110
|
text (`str`):
|
|
3039
3111
|
The input text to classify.
|
|
3040
|
-
candidate_labels (`
|
|
3112
|
+
candidate_labels (`list[str]`):
|
|
3041
3113
|
The set of possible class labels to classify the text into.
|
|
3042
|
-
labels (`
|
|
3114
|
+
labels (`list[str]`, *optional*):
|
|
3043
3115
|
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
3044
3116
|
multi_label (`bool`, *optional*):
|
|
3045
3117
|
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
@@ -3054,12 +3126,12 @@ class InferenceClient:
|
|
|
3054
3126
|
|
|
3055
3127
|
|
|
3056
3128
|
Returns:
|
|
3057
|
-
`
|
|
3129
|
+
`list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3058
3130
|
|
|
3059
3131
|
Raises:
|
|
3060
3132
|
[`InferenceTimeoutError`]:
|
|
3061
3133
|
If the model is unavailable or the request times out.
|
|
3062
|
-
`
|
|
3134
|
+
[`HfHubHTTPError`]:
|
|
3063
3135
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3064
3136
|
|
|
3065
3137
|
Example with `multi_label=False`:
|
|
@@ -3131,22 +3203,22 @@ class InferenceClient:
|
|
|
3131
3203
|
def zero_shot_image_classification(
|
|
3132
3204
|
self,
|
|
3133
3205
|
image: ContentT,
|
|
3134
|
-
candidate_labels:
|
|
3206
|
+
candidate_labels: list[str],
|
|
3135
3207
|
*,
|
|
3136
3208
|
model: Optional[str] = None,
|
|
3137
3209
|
hypothesis_template: Optional[str] = None,
|
|
3138
3210
|
# deprecated argument
|
|
3139
|
-
labels:
|
|
3140
|
-
) ->
|
|
3211
|
+
labels: list[str] = None, # type: ignore
|
|
3212
|
+
) -> list[ZeroShotImageClassificationOutputElement]:
|
|
3141
3213
|
"""
|
|
3142
3214
|
Provide input image and text labels to predict text labels for the image.
|
|
3143
3215
|
|
|
3144
3216
|
Args:
|
|
3145
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
3146
|
-
The input image to caption. It can be raw bytes, an image file,
|
|
3147
|
-
candidate_labels (`
|
|
3217
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3218
|
+
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3219
|
+
candidate_labels (`list[str]`):
|
|
3148
3220
|
The candidate labels for this image
|
|
3149
|
-
labels (`
|
|
3221
|
+
labels (`list[str]`, *optional*):
|
|
3150
3222
|
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
3151
3223
|
model (`str`, *optional*):
|
|
3152
3224
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
@@ -3156,12 +3228,12 @@ class InferenceClient:
|
|
|
3156
3228
|
replacing the placeholder with the candidate labels.
|
|
3157
3229
|
|
|
3158
3230
|
Returns:
|
|
3159
|
-
`
|
|
3231
|
+
`list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3160
3232
|
|
|
3161
3233
|
Raises:
|
|
3162
3234
|
[`InferenceTimeoutError`]:
|
|
3163
3235
|
If the model is unavailable or the request times out.
|
|
3164
|
-
`
|
|
3236
|
+
[`HfHubHTTPError`]:
|
|
3165
3237
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3166
3238
|
|
|
3167
3239
|
Example:
|
|
@@ -3195,102 +3267,7 @@ class InferenceClient:
|
|
|
3195
3267
|
response = self._inner_post(request_parameters)
|
|
3196
3268
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3197
3269
|
|
|
3198
|
-
|
|
3199
|
-
version="0.33.0",
|
|
3200
|
-
message=(
|
|
3201
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3202
|
-
" Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
|
|
3203
|
-
),
|
|
3204
|
-
)
|
|
3205
|
-
def list_deployed_models(
|
|
3206
|
-
self, frameworks: Union[None, str, Literal["all"], List[str]] = None
|
|
3207
|
-
) -> Dict[str, List[str]]:
|
|
3208
|
-
"""
|
|
3209
|
-
List models deployed on the HF Serverless Inference API service.
|
|
3210
|
-
|
|
3211
|
-
This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
|
|
3212
|
-
are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
|
|
3213
|
-
specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
|
|
3214
|
-
in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
|
|
3215
|
-
frameworks are checked, the more time it will take.
|
|
3216
|
-
|
|
3217
|
-
<Tip warning={true}>
|
|
3218
|
-
|
|
3219
|
-
This endpoint method does not return a live list of all models available for the HF Inference API service.
|
|
3220
|
-
It searches over a cached list of models that were recently available and the list may not be up to date.
|
|
3221
|
-
If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
|
|
3222
|
-
|
|
3223
|
-
</Tip>
|
|
3224
|
-
|
|
3225
|
-
<Tip>
|
|
3226
|
-
|
|
3227
|
-
This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
|
|
3228
|
-
check its availability, you can directly use [`~InferenceClient.get_model_status`].
|
|
3229
|
-
|
|
3230
|
-
</Tip>
|
|
3231
|
-
|
|
3232
|
-
Args:
|
|
3233
|
-
frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
|
|
3234
|
-
The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
|
|
3235
|
-
"all", all available frameworks will be tested. It is also possible to provide a single framework or a
|
|
3236
|
-
custom set of frameworks to check.
|
|
3237
|
-
|
|
3238
|
-
Returns:
|
|
3239
|
-
`Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
|
|
3240
|
-
|
|
3241
|
-
Example:
|
|
3242
|
-
```python
|
|
3243
|
-
>>> from huggingface_hub import InferenceClient
|
|
3244
|
-
>>> client = InferenceClient()
|
|
3245
|
-
|
|
3246
|
-
# Discover zero-shot-classification models currently deployed
|
|
3247
|
-
>>> models = client.list_deployed_models()
|
|
3248
|
-
>>> models["zero-shot-classification"]
|
|
3249
|
-
['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
|
|
3250
|
-
|
|
3251
|
-
# List from only 1 framework
|
|
3252
|
-
>>> client.list_deployed_models("text-generation-inference")
|
|
3253
|
-
{'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
|
|
3254
|
-
```
|
|
3255
|
-
"""
|
|
3256
|
-
if self.provider != "hf-inference":
|
|
3257
|
-
raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
|
|
3258
|
-
|
|
3259
|
-
# Resolve which frameworks to check
|
|
3260
|
-
if frameworks is None:
|
|
3261
|
-
frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
|
|
3262
|
-
elif frameworks == "all":
|
|
3263
|
-
frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
|
|
3264
|
-
elif isinstance(frameworks, str):
|
|
3265
|
-
frameworks = [frameworks]
|
|
3266
|
-
frameworks = list(set(frameworks))
|
|
3267
|
-
|
|
3268
|
-
# Fetch them iteratively
|
|
3269
|
-
models_by_task: Dict[str, List[str]] = {}
|
|
3270
|
-
|
|
3271
|
-
def _unpack_response(framework: str, items: List[Dict]) -> None:
|
|
3272
|
-
for model in items:
|
|
3273
|
-
if framework == "sentence-transformers":
|
|
3274
|
-
# Model running with the `sentence-transformers` framework can work with both tasks even if not
|
|
3275
|
-
# branded as such in the API response
|
|
3276
|
-
models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
|
|
3277
|
-
models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
|
|
3278
|
-
else:
|
|
3279
|
-
models_by_task.setdefault(model["task"], []).append(model["model_id"])
|
|
3280
|
-
|
|
3281
|
-
for framework in frameworks:
|
|
3282
|
-
response = get_session().get(
|
|
3283
|
-
f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3284
|
-
)
|
|
3285
|
-
hf_raise_for_status(response)
|
|
3286
|
-
_unpack_response(framework, response.json())
|
|
3287
|
-
|
|
3288
|
-
# Sort alphabetically for discoverability and return
|
|
3289
|
-
for task, models in models_by_task.items():
|
|
3290
|
-
models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
|
|
3291
|
-
return models_by_task
|
|
3292
|
-
|
|
3293
|
-
def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
|
|
3270
|
+
def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
|
|
3294
3271
|
"""
|
|
3295
3272
|
Get information about the deployed endpoint.
|
|
3296
3273
|
|
|
@@ -3303,7 +3280,7 @@ class InferenceClient:
|
|
|
3303
3280
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
3304
3281
|
|
|
3305
3282
|
Returns:
|
|
3306
|
-
`
|
|
3283
|
+
`dict[str, Any]`: Information about the endpoint.
|
|
3307
3284
|
|
|
3308
3285
|
Example:
|
|
3309
3286
|
```py
|
|
@@ -3353,7 +3330,6 @@ class InferenceClient:
|
|
|
3353
3330
|
Check the health of the deployed endpoint.
|
|
3354
3331
|
|
|
3355
3332
|
Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
|
|
3356
|
-
For Inference API, please use [`InferenceClient.get_model_status`] instead.
|
|
3357
3333
|
|
|
3358
3334
|
Args:
|
|
3359
3335
|
model (`str`, *optional*):
|
|
@@ -3377,75 +3353,12 @@ class InferenceClient:
|
|
|
3377
3353
|
if model is None:
|
|
3378
3354
|
raise ValueError("Model id not provided.")
|
|
3379
3355
|
if not model.startswith(("http://", "https://")):
|
|
3380
|
-
raise ValueError(
|
|
3381
|
-
"Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
|
|
3382
|
-
)
|
|
3356
|
+
raise ValueError("Model must be an Inference Endpoint URL.")
|
|
3383
3357
|
url = model.rstrip("/") + "/health"
|
|
3384
3358
|
|
|
3385
3359
|
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3386
3360
|
return response.status_code == 200
|
|
3387
3361
|
|
|
3388
|
-
@_deprecate_method(
|
|
3389
|
-
version="0.33.0",
|
|
3390
|
-
message=(
|
|
3391
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3392
|
-
" Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
|
|
3393
|
-
),
|
|
3394
|
-
)
|
|
3395
|
-
def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
|
|
3396
|
-
"""
|
|
3397
|
-
Get the status of a model hosted on the HF Inference API.
|
|
3398
|
-
|
|
3399
|
-
<Tip>
|
|
3400
|
-
|
|
3401
|
-
This endpoint is mostly useful when you already know which model you want to use and want to check its
|
|
3402
|
-
availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
|
|
3403
|
-
|
|
3404
|
-
</Tip>
|
|
3405
|
-
|
|
3406
|
-
Args:
|
|
3407
|
-
model (`str`, *optional*):
|
|
3408
|
-
Identifier of the model for witch the status gonna be checked. If model is not provided,
|
|
3409
|
-
the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
|
|
3410
|
-
identifier cannot be a URL.
|
|
3411
|
-
|
|
3412
|
-
|
|
3413
|
-
Returns:
|
|
3414
|
-
[`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
|
|
3415
|
-
about the state of the model: load, state, compute type and framework.
|
|
3416
|
-
|
|
3417
|
-
Example:
|
|
3418
|
-
```py
|
|
3419
|
-
>>> from huggingface_hub import InferenceClient
|
|
3420
|
-
>>> client = InferenceClient()
|
|
3421
|
-
>>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
|
|
3422
|
-
ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
|
|
3423
|
-
```
|
|
3424
|
-
"""
|
|
3425
|
-
if self.provider != "hf-inference":
|
|
3426
|
-
raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
|
|
3427
|
-
|
|
3428
|
-
model = model or self.model
|
|
3429
|
-
if model is None:
|
|
3430
|
-
raise ValueError("Model id not provided.")
|
|
3431
|
-
if model.startswith("https://"):
|
|
3432
|
-
raise NotImplementedError("Model status is only available for Inference API endpoints.")
|
|
3433
|
-
url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
|
|
3434
|
-
|
|
3435
|
-
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3436
|
-
hf_raise_for_status(response)
|
|
3437
|
-
response_data = response.json()
|
|
3438
|
-
|
|
3439
|
-
if "error" in response_data:
|
|
3440
|
-
raise ValueError(response_data["error"])
|
|
3441
|
-
|
|
3442
|
-
return ModelStatus(
|
|
3443
|
-
loaded=response_data["loaded"],
|
|
3444
|
-
state=response_data["state"],
|
|
3445
|
-
compute_type=response_data["compute_type"],
|
|
3446
|
-
framework=response_data["framework"],
|
|
3447
|
-
)
|
|
3448
|
-
|
|
3449
3362
|
@property
|
|
3450
3363
|
def chat(self) -> "ProxyClientChat":
|
|
3451
3364
|
return ProxyClientChat(self)
|