huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +46 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +176 -20
- huggingface_hub/_local_folder.py +1 -1
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +10 -14
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +13 -14
- huggingface_hub/_upload_large_folder.py +120 -13
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/_cli_utils.py +2 -2
- huggingface_hub/cli/auth.py +8 -6
- huggingface_hub/cli/cache.py +18 -20
- huggingface_hub/cli/download.py +4 -4
- huggingface_hub/cli/hf.py +2 -5
- huggingface_hub/cli/jobs.py +599 -22
- huggingface_hub/cli/lfs.py +4 -4
- huggingface_hub/cli/repo.py +11 -7
- huggingface_hub/cli/repo_files.py +2 -2
- huggingface_hub/cli/upload.py +4 -4
- huggingface_hub/cli/upload_large_folder.py +3 -3
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +13 -13
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -19
- huggingface_hub/errors.py +42 -29
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +162 -259
- huggingface_hub/hf_api.py +841 -616
- huggingface_hub/hf_file_system.py +98 -62
- huggingface_hub/hub_mixin.py +37 -57
- huggingface_hub/inference/_client.py +257 -325
- huggingface_hub/inference/_common.py +110 -124
- huggingface_hub/inference/_generated/_async_client.py +307 -432
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +2 -3
- huggingface_hub/inference/_mcp/mcp_client.py +58 -30
- huggingface_hub/inference/_mcp/types.py +10 -7
- huggingface_hub/inference/_mcp/utils.py +11 -7
- huggingface_hub/inference/_providers/__init__.py +4 -2
- huggingface_hub/inference/_providers/_common.py +49 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +52 -21
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +28 -20
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +20 -31
- huggingface_hub/repocard.py +18 -18
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +25 -21
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +4 -4
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +366 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +16 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +32 -11
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
- huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -21,16 +21,19 @@
|
|
|
21
21
|
import asyncio
|
|
22
22
|
import base64
|
|
23
23
|
import logging
|
|
24
|
+
import os
|
|
24
25
|
import re
|
|
25
26
|
import warnings
|
|
26
|
-
from
|
|
27
|
+
from contextlib import AsyncExitStack
|
|
28
|
+
from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
|
|
29
|
+
|
|
30
|
+
import httpx
|
|
27
31
|
|
|
28
32
|
from huggingface_hub import constants
|
|
29
|
-
from huggingface_hub.errors import InferenceTimeoutError
|
|
33
|
+
from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
|
|
30
34
|
from huggingface_hub.inference._common import (
|
|
31
35
|
TASKS_EXPECTING_IMAGES,
|
|
32
36
|
ContentT,
|
|
33
|
-
ModelStatus,
|
|
34
37
|
RequestParameters,
|
|
35
38
|
_async_stream_chat_completion_response,
|
|
36
39
|
_async_stream_text_generation_response,
|
|
@@ -41,7 +44,6 @@ from huggingface_hub.inference._common import (
|
|
|
41
44
|
_bytes_to_list,
|
|
42
45
|
_get_unsupported_text_generation_kwargs,
|
|
43
46
|
_import_numpy,
|
|
44
|
-
_open_as_binary,
|
|
45
47
|
_set_unsupported_text_generation_kwargs,
|
|
46
48
|
raise_text_generation_error,
|
|
47
49
|
)
|
|
@@ -66,6 +68,7 @@ from huggingface_hub.inference._generated.types import (
|
|
|
66
68
|
ImageSegmentationSubtask,
|
|
67
69
|
ImageToImageTargetSize,
|
|
68
70
|
ImageToTextOutput,
|
|
71
|
+
ImageToVideoTargetSize,
|
|
69
72
|
ObjectDetectionOutputElement,
|
|
70
73
|
Padding,
|
|
71
74
|
QuestionAnsweringOutputElement,
|
|
@@ -87,16 +90,19 @@ from huggingface_hub.inference._generated.types import (
|
|
|
87
90
|
ZeroShotImageClassificationOutputElement,
|
|
88
91
|
)
|
|
89
92
|
from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
|
|
90
|
-
from huggingface_hub.utils import
|
|
93
|
+
from huggingface_hub.utils import (
|
|
94
|
+
build_hf_headers,
|
|
95
|
+
get_async_session,
|
|
96
|
+
hf_raise_for_status,
|
|
97
|
+
validate_hf_hub_args,
|
|
98
|
+
)
|
|
91
99
|
from huggingface_hub.utils._auth import get_token
|
|
92
|
-
from huggingface_hub.utils._deprecation import _deprecate_method
|
|
93
100
|
|
|
94
|
-
from .._common import _async_yield_from
|
|
101
|
+
from .._common import _async_yield_from
|
|
95
102
|
|
|
96
103
|
|
|
97
104
|
if TYPE_CHECKING:
|
|
98
105
|
import numpy as np
|
|
99
|
-
from aiohttp import ClientResponse, ClientSession
|
|
100
106
|
from PIL.Image import Image
|
|
101
107
|
|
|
102
108
|
logger = logging.getLogger(__name__)
|
|
@@ -129,18 +135,14 @@ class AsyncInferenceClient:
|
|
|
129
135
|
arguments are mutually exclusive and have the exact same behavior.
|
|
130
136
|
timeout (`float`, `optional`):
|
|
131
137
|
The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
|
|
132
|
-
headers (`
|
|
138
|
+
headers (`dict[str, str]`, `optional`):
|
|
133
139
|
Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
|
|
134
140
|
Values in this dictionary will override the default values.
|
|
135
141
|
bill_to (`str`, `optional`):
|
|
136
142
|
The billing account to use for the requests. By default the requests are billed on the user's account.
|
|
137
143
|
Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
|
|
138
|
-
cookies (`
|
|
144
|
+
cookies (`dict[str, str]`, `optional`):
|
|
139
145
|
Additional cookies to send to the server.
|
|
140
|
-
trust_env ('bool', 'optional'):
|
|
141
|
-
Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
|
|
142
|
-
proxies (`Any`, `optional`):
|
|
143
|
-
Proxies to use for the request.
|
|
144
146
|
base_url (`str`, `optional`):
|
|
145
147
|
Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
|
|
146
148
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
|
|
@@ -149,6 +151,7 @@ class AsyncInferenceClient:
|
|
|
149
151
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
|
|
150
152
|
"""
|
|
151
153
|
|
|
154
|
+
@validate_hf_hub_args
|
|
152
155
|
def __init__(
|
|
153
156
|
self,
|
|
154
157
|
model: Optional[str] = None,
|
|
@@ -156,10 +159,8 @@ class AsyncInferenceClient:
|
|
|
156
159
|
provider: Optional[PROVIDER_OR_POLICY_T] = None,
|
|
157
160
|
token: Optional[str] = None,
|
|
158
161
|
timeout: Optional[float] = None,
|
|
159
|
-
headers: Optional[
|
|
160
|
-
cookies: Optional[
|
|
161
|
-
trust_env: bool = False,
|
|
162
|
-
proxies: Optional[Any] = None,
|
|
162
|
+
headers: Optional[dict[str, str]] = None,
|
|
163
|
+
cookies: Optional[dict[str, str]] = None,
|
|
163
164
|
bill_to: Optional[str] = None,
|
|
164
165
|
# OpenAI compatibility
|
|
165
166
|
base_url: Optional[str] = None,
|
|
@@ -221,15 +222,36 @@ class AsyncInferenceClient:
|
|
|
221
222
|
|
|
222
223
|
self.cookies = cookies
|
|
223
224
|
self.timeout = timeout
|
|
224
|
-
self.trust_env = trust_env
|
|
225
|
-
self.proxies = proxies
|
|
226
225
|
|
|
227
|
-
|
|
228
|
-
self.
|
|
226
|
+
self.exit_stack = AsyncExitStack()
|
|
227
|
+
self._async_client: Optional[httpx.AsyncClient] = None
|
|
229
228
|
|
|
230
229
|
def __repr__(self):
|
|
231
230
|
return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
|
|
232
231
|
|
|
232
|
+
async def __aenter__(self):
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
236
|
+
await self.close()
|
|
237
|
+
|
|
238
|
+
async def close(self):
|
|
239
|
+
"""Close the client.
|
|
240
|
+
|
|
241
|
+
This method is automatically called when using the client as a context manager.
|
|
242
|
+
"""
|
|
243
|
+
await self.exit_stack.aclose()
|
|
244
|
+
|
|
245
|
+
async def _get_async_client(self):
|
|
246
|
+
"""Get a unique async client for this AsyncInferenceClient instance.
|
|
247
|
+
|
|
248
|
+
Returns the same client instance on subsequent calls, ensuring proper
|
|
249
|
+
connection reuse and resource management through the exit stack.
|
|
250
|
+
"""
|
|
251
|
+
if self._async_client is None:
|
|
252
|
+
self._async_client = await self.exit_stack.enter_async_context(get_async_session())
|
|
253
|
+
return self._async_client
|
|
254
|
+
|
|
233
255
|
@overload
|
|
234
256
|
async def _inner_post( # type: ignore[misc]
|
|
235
257
|
self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
|
|
@@ -238,83 +260,59 @@ class AsyncInferenceClient:
|
|
|
238
260
|
@overload
|
|
239
261
|
async def _inner_post( # type: ignore[misc]
|
|
240
262
|
self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
|
|
241
|
-
) -> AsyncIterable[
|
|
263
|
+
) -> AsyncIterable[str]: ...
|
|
242
264
|
|
|
243
265
|
@overload
|
|
244
266
|
async def _inner_post(
|
|
245
267
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
246
|
-
) -> Union[bytes, AsyncIterable[
|
|
268
|
+
) -> Union[bytes, AsyncIterable[str]]: ...
|
|
247
269
|
|
|
248
270
|
async def _inner_post(
|
|
249
271
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
250
|
-
) -> Union[bytes, AsyncIterable[
|
|
272
|
+
) -> Union[bytes, AsyncIterable[str]]:
|
|
251
273
|
"""Make a request to the inference server."""
|
|
252
274
|
|
|
253
|
-
aiohttp = _import_aiohttp()
|
|
254
|
-
|
|
255
275
|
# TODO: this should be handled in provider helpers directly
|
|
256
276
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
257
277
|
request_parameters.headers["Accept"] = "image/png"
|
|
258
278
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
279
|
+
try:
|
|
280
|
+
client = await self._get_async_client()
|
|
281
|
+
if stream:
|
|
282
|
+
response = await self.exit_stack.enter_async_context(
|
|
283
|
+
client.stream(
|
|
284
|
+
"POST",
|
|
285
|
+
request_parameters.url,
|
|
286
|
+
json=request_parameters.json,
|
|
287
|
+
data=request_parameters.data,
|
|
288
|
+
headers=request_parameters.headers,
|
|
289
|
+
cookies=self.cookies,
|
|
290
|
+
timeout=self.timeout,
|
|
291
|
+
)
|
|
267
292
|
)
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
raise
|
|
292
|
-
|
|
293
|
-
async def __aenter__(self):
|
|
294
|
-
return self
|
|
295
|
-
|
|
296
|
-
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
297
|
-
await self.close()
|
|
298
|
-
|
|
299
|
-
def __del__(self):
|
|
300
|
-
if len(self._sessions) > 0:
|
|
301
|
-
warnings.warn(
|
|
302
|
-
"Deleting 'AsyncInferenceClient' client but some sessions are still open. "
|
|
303
|
-
"This can happen if you've stopped streaming data from the server before the stream was complete. "
|
|
304
|
-
"To close the client properly, you must call `await client.close()` "
|
|
305
|
-
"or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
|
|
306
|
-
)
|
|
307
|
-
|
|
308
|
-
async def close(self):
|
|
309
|
-
"""Close all open sessions.
|
|
310
|
-
|
|
311
|
-
By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
|
|
312
|
-
are streaming data from the server and you stop before the stream is complete, you must call this method to
|
|
313
|
-
close the session properly.
|
|
314
|
-
|
|
315
|
-
Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
|
|
316
|
-
"""
|
|
317
|
-
await asyncio.gather(*[session.close() for session in self._sessions.keys()])
|
|
293
|
+
hf_raise_for_status(response)
|
|
294
|
+
return _async_yield_from(client, response)
|
|
295
|
+
else:
|
|
296
|
+
response = await client.post(
|
|
297
|
+
request_parameters.url,
|
|
298
|
+
json=request_parameters.json,
|
|
299
|
+
data=request_parameters.data,
|
|
300
|
+
headers=request_parameters.headers,
|
|
301
|
+
cookies=self.cookies,
|
|
302
|
+
timeout=self.timeout,
|
|
303
|
+
)
|
|
304
|
+
hf_raise_for_status(response)
|
|
305
|
+
return response.content
|
|
306
|
+
except asyncio.TimeoutError as error:
|
|
307
|
+
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
308
|
+
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
309
|
+
except HfHubHTTPError as error:
|
|
310
|
+
if error.response.status_code == 422 and request_parameters.task != "unknown":
|
|
311
|
+
msg = str(error.args[0])
|
|
312
|
+
if len(error.response.text) > 0:
|
|
313
|
+
msg += f"{os.linesep}{error.response.text}{os.linesep}"
|
|
314
|
+
error.args = (msg,) + error.args[1:]
|
|
315
|
+
raise
|
|
318
316
|
|
|
319
317
|
async def audio_classification(
|
|
320
318
|
self,
|
|
@@ -323,7 +321,7 @@ class AsyncInferenceClient:
|
|
|
323
321
|
model: Optional[str] = None,
|
|
324
322
|
top_k: Optional[int] = None,
|
|
325
323
|
function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
|
|
326
|
-
) ->
|
|
324
|
+
) -> list[AudioClassificationOutputElement]:
|
|
327
325
|
"""
|
|
328
326
|
Perform audio classification on the provided audio content.
|
|
329
327
|
|
|
@@ -341,12 +339,12 @@ class AsyncInferenceClient:
|
|
|
341
339
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
342
340
|
|
|
343
341
|
Returns:
|
|
344
|
-
`
|
|
342
|
+
`list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
345
343
|
|
|
346
344
|
Raises:
|
|
347
345
|
[`InferenceTimeoutError`]:
|
|
348
346
|
If the model is unavailable or the request times out.
|
|
349
|
-
`
|
|
347
|
+
[`HfHubHTTPError`]:
|
|
350
348
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
351
349
|
|
|
352
350
|
Example:
|
|
@@ -379,7 +377,7 @@ class AsyncInferenceClient:
|
|
|
379
377
|
audio: ContentT,
|
|
380
378
|
*,
|
|
381
379
|
model: Optional[str] = None,
|
|
382
|
-
) ->
|
|
380
|
+
) -> list[AudioToAudioOutputElement]:
|
|
383
381
|
"""
|
|
384
382
|
Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
|
|
385
383
|
|
|
@@ -393,12 +391,12 @@ class AsyncInferenceClient:
|
|
|
393
391
|
audio_to_audio will be used.
|
|
394
392
|
|
|
395
393
|
Returns:
|
|
396
|
-
`
|
|
394
|
+
`list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
|
|
397
395
|
|
|
398
396
|
Raises:
|
|
399
397
|
`InferenceTimeoutError`:
|
|
400
398
|
If the model is unavailable or the request times out.
|
|
401
|
-
`
|
|
399
|
+
[`HfHubHTTPError`]:
|
|
402
400
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
403
401
|
|
|
404
402
|
Example:
|
|
@@ -432,7 +430,7 @@ class AsyncInferenceClient:
|
|
|
432
430
|
audio: ContentT,
|
|
433
431
|
*,
|
|
434
432
|
model: Optional[str] = None,
|
|
435
|
-
extra_body: Optional[
|
|
433
|
+
extra_body: Optional[dict] = None,
|
|
436
434
|
) -> AutomaticSpeechRecognitionOutput:
|
|
437
435
|
"""
|
|
438
436
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -443,7 +441,7 @@ class AsyncInferenceClient:
|
|
|
443
441
|
model (`str`, *optional*):
|
|
444
442
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
445
443
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
446
|
-
extra_body (`
|
|
444
|
+
extra_body (`dict`, *optional*):
|
|
447
445
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
448
446
|
for supported parameters.
|
|
449
447
|
Returns:
|
|
@@ -452,7 +450,7 @@ class AsyncInferenceClient:
|
|
|
452
450
|
Raises:
|
|
453
451
|
[`InferenceTimeoutError`]:
|
|
454
452
|
If the model is unavailable or the request times out.
|
|
455
|
-
`
|
|
453
|
+
[`HfHubHTTPError`]:
|
|
456
454
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
457
455
|
|
|
458
456
|
Example:
|
|
@@ -479,105 +477,105 @@ class AsyncInferenceClient:
|
|
|
479
477
|
@overload
|
|
480
478
|
async def chat_completion( # type: ignore
|
|
481
479
|
self,
|
|
482
|
-
messages:
|
|
480
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
483
481
|
*,
|
|
484
482
|
model: Optional[str] = None,
|
|
485
483
|
stream: Literal[False] = False,
|
|
486
484
|
frequency_penalty: Optional[float] = None,
|
|
487
|
-
logit_bias: Optional[
|
|
485
|
+
logit_bias: Optional[list[float]] = None,
|
|
488
486
|
logprobs: Optional[bool] = None,
|
|
489
487
|
max_tokens: Optional[int] = None,
|
|
490
488
|
n: Optional[int] = None,
|
|
491
489
|
presence_penalty: Optional[float] = None,
|
|
492
490
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
493
491
|
seed: Optional[int] = None,
|
|
494
|
-
stop: Optional[
|
|
492
|
+
stop: Optional[list[str]] = None,
|
|
495
493
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
496
494
|
temperature: Optional[float] = None,
|
|
497
495
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
498
496
|
tool_prompt: Optional[str] = None,
|
|
499
|
-
tools: Optional[
|
|
497
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
500
498
|
top_logprobs: Optional[int] = None,
|
|
501
499
|
top_p: Optional[float] = None,
|
|
502
|
-
extra_body: Optional[
|
|
500
|
+
extra_body: Optional[dict] = None,
|
|
503
501
|
) -> ChatCompletionOutput: ...
|
|
504
502
|
|
|
505
503
|
@overload
|
|
506
504
|
async def chat_completion( # type: ignore
|
|
507
505
|
self,
|
|
508
|
-
messages:
|
|
506
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
509
507
|
*,
|
|
510
508
|
model: Optional[str] = None,
|
|
511
509
|
stream: Literal[True] = True,
|
|
512
510
|
frequency_penalty: Optional[float] = None,
|
|
513
|
-
logit_bias: Optional[
|
|
511
|
+
logit_bias: Optional[list[float]] = None,
|
|
514
512
|
logprobs: Optional[bool] = None,
|
|
515
513
|
max_tokens: Optional[int] = None,
|
|
516
514
|
n: Optional[int] = None,
|
|
517
515
|
presence_penalty: Optional[float] = None,
|
|
518
516
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
519
517
|
seed: Optional[int] = None,
|
|
520
|
-
stop: Optional[
|
|
518
|
+
stop: Optional[list[str]] = None,
|
|
521
519
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
522
520
|
temperature: Optional[float] = None,
|
|
523
521
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
524
522
|
tool_prompt: Optional[str] = None,
|
|
525
|
-
tools: Optional[
|
|
523
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
526
524
|
top_logprobs: Optional[int] = None,
|
|
527
525
|
top_p: Optional[float] = None,
|
|
528
|
-
extra_body: Optional[
|
|
526
|
+
extra_body: Optional[dict] = None,
|
|
529
527
|
) -> AsyncIterable[ChatCompletionStreamOutput]: ...
|
|
530
528
|
|
|
531
529
|
@overload
|
|
532
530
|
async def chat_completion(
|
|
533
531
|
self,
|
|
534
|
-
messages:
|
|
532
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
535
533
|
*,
|
|
536
534
|
model: Optional[str] = None,
|
|
537
535
|
stream: bool = False,
|
|
538
536
|
frequency_penalty: Optional[float] = None,
|
|
539
|
-
logit_bias: Optional[
|
|
537
|
+
logit_bias: Optional[list[float]] = None,
|
|
540
538
|
logprobs: Optional[bool] = None,
|
|
541
539
|
max_tokens: Optional[int] = None,
|
|
542
540
|
n: Optional[int] = None,
|
|
543
541
|
presence_penalty: Optional[float] = None,
|
|
544
542
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
545
543
|
seed: Optional[int] = None,
|
|
546
|
-
stop: Optional[
|
|
544
|
+
stop: Optional[list[str]] = None,
|
|
547
545
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
548
546
|
temperature: Optional[float] = None,
|
|
549
547
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
550
548
|
tool_prompt: Optional[str] = None,
|
|
551
|
-
tools: Optional[
|
|
549
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
552
550
|
top_logprobs: Optional[int] = None,
|
|
553
551
|
top_p: Optional[float] = None,
|
|
554
|
-
extra_body: Optional[
|
|
552
|
+
extra_body: Optional[dict] = None,
|
|
555
553
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
|
|
556
554
|
|
|
557
555
|
async def chat_completion(
|
|
558
556
|
self,
|
|
559
|
-
messages:
|
|
557
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
560
558
|
*,
|
|
561
559
|
model: Optional[str] = None,
|
|
562
560
|
stream: bool = False,
|
|
563
561
|
# Parameters from ChatCompletionInput (handled manually)
|
|
564
562
|
frequency_penalty: Optional[float] = None,
|
|
565
|
-
logit_bias: Optional[
|
|
563
|
+
logit_bias: Optional[list[float]] = None,
|
|
566
564
|
logprobs: Optional[bool] = None,
|
|
567
565
|
max_tokens: Optional[int] = None,
|
|
568
566
|
n: Optional[int] = None,
|
|
569
567
|
presence_penalty: Optional[float] = None,
|
|
570
568
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
571
569
|
seed: Optional[int] = None,
|
|
572
|
-
stop: Optional[
|
|
570
|
+
stop: Optional[list[str]] = None,
|
|
573
571
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
574
572
|
temperature: Optional[float] = None,
|
|
575
573
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
576
574
|
tool_prompt: Optional[str] = None,
|
|
577
|
-
tools: Optional[
|
|
575
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
578
576
|
top_logprobs: Optional[int] = None,
|
|
579
577
|
top_p: Optional[float] = None,
|
|
580
|
-
extra_body: Optional[
|
|
578
|
+
extra_body: Optional[dict] = None,
|
|
581
579
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
|
|
582
580
|
"""
|
|
583
581
|
A method for completing conversations using a specified language model.
|
|
@@ -607,7 +605,7 @@ class AsyncInferenceClient:
|
|
|
607
605
|
frequency_penalty (`float`, *optional*):
|
|
608
606
|
Penalizes new tokens based on their existing frequency
|
|
609
607
|
in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
|
|
610
|
-
logit_bias (`
|
|
608
|
+
logit_bias (`list[float]`, *optional*):
|
|
611
609
|
Adjusts the likelihood of specific tokens appearing in the generated output.
|
|
612
610
|
logprobs (`bool`, *optional*):
|
|
613
611
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
@@ -623,7 +621,7 @@ class AsyncInferenceClient:
|
|
|
623
621
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
624
622
|
seed (Optional[`int`], *optional*):
|
|
625
623
|
Seed for reproducible control flow. Defaults to None.
|
|
626
|
-
stop (`
|
|
624
|
+
stop (`list[str]`, *optional*):
|
|
627
625
|
Up to four strings which trigger the end of the response.
|
|
628
626
|
Defaults to None.
|
|
629
627
|
stream (`bool`, *optional*):
|
|
@@ -647,7 +645,7 @@ class AsyncInferenceClient:
|
|
|
647
645
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
648
646
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
649
647
|
provide a list of functions the model may generate JSON inputs for.
|
|
650
|
-
extra_body (`
|
|
648
|
+
extra_body (`dict`, *optional*):
|
|
651
649
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
652
650
|
for supported parameters.
|
|
653
651
|
Returns:
|
|
@@ -659,7 +657,7 @@ class AsyncInferenceClient:
|
|
|
659
657
|
Raises:
|
|
660
658
|
[`InferenceTimeoutError`]:
|
|
661
659
|
If the model is unavailable or the request times out.
|
|
662
|
-
`
|
|
660
|
+
[`HfHubHTTPError`]:
|
|
663
661
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
664
662
|
|
|
665
663
|
Example:
|
|
@@ -979,8 +977,8 @@ class AsyncInferenceClient:
|
|
|
979
977
|
max_question_len: Optional[int] = None,
|
|
980
978
|
max_seq_len: Optional[int] = None,
|
|
981
979
|
top_k: Optional[int] = None,
|
|
982
|
-
word_boxes: Optional[
|
|
983
|
-
) ->
|
|
980
|
+
word_boxes: Optional[list[Union[list[float], str]]] = None,
|
|
981
|
+
) -> list[DocumentQuestionAnsweringOutputElement]:
|
|
984
982
|
"""
|
|
985
983
|
Answer questions on document images.
|
|
986
984
|
|
|
@@ -1010,16 +1008,16 @@ class AsyncInferenceClient:
|
|
|
1010
1008
|
top_k (`int`, *optional*):
|
|
1011
1009
|
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
1012
1010
|
answers if there are not enough options available within the context.
|
|
1013
|
-
word_boxes (`
|
|
1011
|
+
word_boxes (`list[Union[list[float], str`, *optional*):
|
|
1014
1012
|
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
1015
1013
|
step and use the provided bounding boxes instead.
|
|
1016
1014
|
Returns:
|
|
1017
|
-
`
|
|
1015
|
+
`list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
1018
1016
|
|
|
1019
1017
|
Raises:
|
|
1020
1018
|
[`InferenceTimeoutError`]:
|
|
1021
1019
|
If the model is unavailable or the request times out.
|
|
1022
|
-
`
|
|
1020
|
+
[`HfHubHTTPError`]:
|
|
1023
1021
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1024
1022
|
|
|
1025
1023
|
|
|
@@ -1034,7 +1032,7 @@ class AsyncInferenceClient:
|
|
|
1034
1032
|
"""
|
|
1035
1033
|
model_id = model or self.model
|
|
1036
1034
|
provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
|
|
1037
|
-
inputs:
|
|
1035
|
+
inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
1038
1036
|
request_parameters = provider_helper.prepare_request(
|
|
1039
1037
|
inputs=inputs,
|
|
1040
1038
|
parameters={
|
|
@@ -1095,7 +1093,7 @@ class AsyncInferenceClient:
|
|
|
1095
1093
|
Raises:
|
|
1096
1094
|
[`InferenceTimeoutError`]:
|
|
1097
1095
|
If the model is unavailable or the request times out.
|
|
1098
|
-
`
|
|
1096
|
+
[`HfHubHTTPError`]:
|
|
1099
1097
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1100
1098
|
|
|
1101
1099
|
Example:
|
|
@@ -1133,9 +1131,9 @@ class AsyncInferenceClient:
|
|
|
1133
1131
|
text: str,
|
|
1134
1132
|
*,
|
|
1135
1133
|
model: Optional[str] = None,
|
|
1136
|
-
targets: Optional[
|
|
1134
|
+
targets: Optional[list[str]] = None,
|
|
1137
1135
|
top_k: Optional[int] = None,
|
|
1138
|
-
) ->
|
|
1136
|
+
) -> list[FillMaskOutputElement]:
|
|
1139
1137
|
"""
|
|
1140
1138
|
Fill in a hole with a missing word (token to be precise).
|
|
1141
1139
|
|
|
@@ -1145,20 +1143,20 @@ class AsyncInferenceClient:
|
|
|
1145
1143
|
model (`str`, *optional*):
|
|
1146
1144
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1147
1145
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1148
|
-
targets (`
|
|
1146
|
+
targets (`list[str`, *optional*):
|
|
1149
1147
|
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1150
1148
|
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1151
1149
|
resulting token will be used (with a warning, and that might be slower).
|
|
1152
1150
|
top_k (`int`, *optional*):
|
|
1153
1151
|
When passed, overrides the number of predictions to return.
|
|
1154
1152
|
Returns:
|
|
1155
|
-
`
|
|
1153
|
+
`list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
|
|
1156
1154
|
probability, token reference, and completed text.
|
|
1157
1155
|
|
|
1158
1156
|
Raises:
|
|
1159
1157
|
[`InferenceTimeoutError`]:
|
|
1160
1158
|
If the model is unavailable or the request times out.
|
|
1161
|
-
`
|
|
1159
|
+
[`HfHubHTTPError`]:
|
|
1162
1160
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1163
1161
|
|
|
1164
1162
|
Example:
|
|
@@ -1192,7 +1190,7 @@ class AsyncInferenceClient:
|
|
|
1192
1190
|
model: Optional[str] = None,
|
|
1193
1191
|
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1194
1192
|
top_k: Optional[int] = None,
|
|
1195
|
-
) ->
|
|
1193
|
+
) -> list[ImageClassificationOutputElement]:
|
|
1196
1194
|
"""
|
|
1197
1195
|
Perform image classification on the given image using the specified model.
|
|
1198
1196
|
|
|
@@ -1207,12 +1205,12 @@ class AsyncInferenceClient:
|
|
|
1207
1205
|
top_k (`int`, *optional*):
|
|
1208
1206
|
When specified, limits the output to the top K most probable classes.
|
|
1209
1207
|
Returns:
|
|
1210
|
-
`
|
|
1208
|
+
`list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1211
1209
|
|
|
1212
1210
|
Raises:
|
|
1213
1211
|
[`InferenceTimeoutError`]:
|
|
1214
1212
|
If the model is unavailable or the request times out.
|
|
1215
|
-
`
|
|
1213
|
+
[`HfHubHTTPError`]:
|
|
1216
1214
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1217
1215
|
|
|
1218
1216
|
Example:
|
|
@@ -1245,7 +1243,7 @@ class AsyncInferenceClient:
|
|
|
1245
1243
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1246
1244
|
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1247
1245
|
threshold: Optional[float] = None,
|
|
1248
|
-
) ->
|
|
1246
|
+
) -> list[ImageSegmentationOutputElement]:
|
|
1249
1247
|
"""
|
|
1250
1248
|
Perform image segmentation on the given image using the specified model.
|
|
1251
1249
|
|
|
@@ -1270,12 +1268,12 @@ class AsyncInferenceClient:
|
|
|
1270
1268
|
threshold (`float`, *optional*):
|
|
1271
1269
|
Probability threshold to filter out predicted masks.
|
|
1272
1270
|
Returns:
|
|
1273
|
-
`
|
|
1271
|
+
`list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
|
|
1274
1272
|
|
|
1275
1273
|
Raises:
|
|
1276
1274
|
[`InferenceTimeoutError`]:
|
|
1277
1275
|
If the model is unavailable or the request times out.
|
|
1278
|
-
`
|
|
1276
|
+
[`HfHubHTTPError`]:
|
|
1279
1277
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1280
1278
|
|
|
1281
1279
|
Example:
|
|
@@ -1353,7 +1351,7 @@ class AsyncInferenceClient:
|
|
|
1353
1351
|
Raises:
|
|
1354
1352
|
[`InferenceTimeoutError`]:
|
|
1355
1353
|
If the model is unavailable or the request times out.
|
|
1356
|
-
`
|
|
1354
|
+
[`HfHubHTTPError`]:
|
|
1357
1355
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1358
1356
|
|
|
1359
1357
|
Example:
|
|
@@ -1385,6 +1383,86 @@ class AsyncInferenceClient:
|
|
|
1385
1383
|
response = provider_helper.get_response(response, request_parameters)
|
|
1386
1384
|
return _bytes_to_image(response)
|
|
1387
1385
|
|
|
1386
|
+
async def image_to_video(
|
|
1387
|
+
self,
|
|
1388
|
+
image: ContentT,
|
|
1389
|
+
*,
|
|
1390
|
+
model: Optional[str] = None,
|
|
1391
|
+
prompt: Optional[str] = None,
|
|
1392
|
+
negative_prompt: Optional[str] = None,
|
|
1393
|
+
num_frames: Optional[float] = None,
|
|
1394
|
+
num_inference_steps: Optional[int] = None,
|
|
1395
|
+
guidance_scale: Optional[float] = None,
|
|
1396
|
+
seed: Optional[int] = None,
|
|
1397
|
+
target_size: Optional[ImageToVideoTargetSize] = None,
|
|
1398
|
+
**kwargs,
|
|
1399
|
+
) -> bytes:
|
|
1400
|
+
"""
|
|
1401
|
+
Generate a video from an input image.
|
|
1402
|
+
|
|
1403
|
+
Args:
|
|
1404
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1405
|
+
The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1406
|
+
model (`str`, *optional*):
|
|
1407
|
+
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1408
|
+
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
1409
|
+
prompt (`str`, *optional*):
|
|
1410
|
+
The text prompt to guide the video generation.
|
|
1411
|
+
negative_prompt (`str`, *optional*):
|
|
1412
|
+
One prompt to guide what NOT to include in video generation.
|
|
1413
|
+
num_frames (`float`, *optional*):
|
|
1414
|
+
The num_frames parameter determines how many video frames are generated.
|
|
1415
|
+
num_inference_steps (`int`, *optional*):
|
|
1416
|
+
For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
|
|
1417
|
+
quality image at the expense of slower inference.
|
|
1418
|
+
guidance_scale (`float`, *optional*):
|
|
1419
|
+
For diffusion models. A higher guidance scale value encourages the model to generate videos closely
|
|
1420
|
+
linked to the text prompt at the expense of lower image quality.
|
|
1421
|
+
seed (`int`, *optional*):
|
|
1422
|
+
The seed to use for the video generation.
|
|
1423
|
+
target_size (`ImageToVideoTargetSize`, *optional*):
|
|
1424
|
+
The size in pixel of the output video frames.
|
|
1425
|
+
num_inference_steps (`int`, *optional*):
|
|
1426
|
+
The number of denoising steps. More denoising steps usually lead to a higher quality video at the
|
|
1427
|
+
expense of slower inference.
|
|
1428
|
+
seed (`int`, *optional*):
|
|
1429
|
+
Seed for the random number generator.
|
|
1430
|
+
|
|
1431
|
+
Returns:
|
|
1432
|
+
`bytes`: The generated video.
|
|
1433
|
+
|
|
1434
|
+
Examples:
|
|
1435
|
+
```py
|
|
1436
|
+
# Must be run in an async context
|
|
1437
|
+
>>> from huggingface_hub import AsyncInferenceClient
|
|
1438
|
+
>>> client = AsyncInferenceClient()
|
|
1439
|
+
>>> video = await client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger")
|
|
1440
|
+
>>> with open("tiger.mp4", "wb") as f:
|
|
1441
|
+
... f.write(video)
|
|
1442
|
+
```
|
|
1443
|
+
"""
|
|
1444
|
+
model_id = model or self.model
|
|
1445
|
+
provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id)
|
|
1446
|
+
request_parameters = provider_helper.prepare_request(
|
|
1447
|
+
inputs=image,
|
|
1448
|
+
parameters={
|
|
1449
|
+
"prompt": prompt,
|
|
1450
|
+
"negative_prompt": negative_prompt,
|
|
1451
|
+
"num_frames": num_frames,
|
|
1452
|
+
"num_inference_steps": num_inference_steps,
|
|
1453
|
+
"guidance_scale": guidance_scale,
|
|
1454
|
+
"seed": seed,
|
|
1455
|
+
"target_size": target_size,
|
|
1456
|
+
**kwargs,
|
|
1457
|
+
},
|
|
1458
|
+
headers=self.headers,
|
|
1459
|
+
model=model_id,
|
|
1460
|
+
api_key=self.token,
|
|
1461
|
+
)
|
|
1462
|
+
response = await self._inner_post(request_parameters)
|
|
1463
|
+
response = provider_helper.get_response(response, request_parameters)
|
|
1464
|
+
return response
|
|
1465
|
+
|
|
1388
1466
|
async def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
|
|
1389
1467
|
"""
|
|
1390
1468
|
Takes an input image and return text.
|
|
@@ -1405,7 +1483,7 @@ class AsyncInferenceClient:
|
|
|
1405
1483
|
Raises:
|
|
1406
1484
|
[`InferenceTimeoutError`]:
|
|
1407
1485
|
If the model is unavailable or the request times out.
|
|
1408
|
-
`
|
|
1486
|
+
[`HfHubHTTPError`]:
|
|
1409
1487
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1410
1488
|
|
|
1411
1489
|
Example:
|
|
@@ -1429,12 +1507,12 @@ class AsyncInferenceClient:
|
|
|
1429
1507
|
api_key=self.token,
|
|
1430
1508
|
)
|
|
1431
1509
|
response = await self._inner_post(request_parameters)
|
|
1432
|
-
|
|
1433
|
-
return
|
|
1510
|
+
output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
|
|
1511
|
+
return output_list[0]
|
|
1434
1512
|
|
|
1435
1513
|
async def object_detection(
|
|
1436
1514
|
self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
|
|
1437
|
-
) ->
|
|
1515
|
+
) -> list[ObjectDetectionOutputElement]:
|
|
1438
1516
|
"""
|
|
1439
1517
|
Perform object detection on the given image using the specified model.
|
|
1440
1518
|
|
|
@@ -1453,12 +1531,12 @@ class AsyncInferenceClient:
|
|
|
1453
1531
|
threshold (`float`, *optional*):
|
|
1454
1532
|
The probability necessary to make a prediction.
|
|
1455
1533
|
Returns:
|
|
1456
|
-
`
|
|
1534
|
+
`list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
|
|
1457
1535
|
|
|
1458
1536
|
Raises:
|
|
1459
1537
|
[`InferenceTimeoutError`]:
|
|
1460
1538
|
If the model is unavailable or the request times out.
|
|
1461
|
-
`
|
|
1539
|
+
[`HfHubHTTPError`]:
|
|
1462
1540
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1463
1541
|
`ValueError`:
|
|
1464
1542
|
If the request output is not a List.
|
|
@@ -1497,7 +1575,7 @@ class AsyncInferenceClient:
|
|
|
1497
1575
|
max_question_len: Optional[int] = None,
|
|
1498
1576
|
max_seq_len: Optional[int] = None,
|
|
1499
1577
|
top_k: Optional[int] = None,
|
|
1500
|
-
) -> Union[QuestionAnsweringOutputElement,
|
|
1578
|
+
) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
|
|
1501
1579
|
"""
|
|
1502
1580
|
Retrieve the answer to a question from a given text.
|
|
1503
1581
|
|
|
@@ -1529,13 +1607,13 @@ class AsyncInferenceClient:
|
|
|
1529
1607
|
topk answers if there are not enough options available within the context.
|
|
1530
1608
|
|
|
1531
1609
|
Returns:
|
|
1532
|
-
Union[`QuestionAnsweringOutputElement`,
|
|
1610
|
+
Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
|
|
1533
1611
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
1534
1612
|
When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
|
|
1535
1613
|
Raises:
|
|
1536
1614
|
[`InferenceTimeoutError`]:
|
|
1537
1615
|
If the model is unavailable or the request times out.
|
|
1538
|
-
`
|
|
1616
|
+
[`HfHubHTTPError`]:
|
|
1539
1617
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1540
1618
|
|
|
1541
1619
|
Example:
|
|
@@ -1570,15 +1648,15 @@ class AsyncInferenceClient:
|
|
|
1570
1648
|
return output
|
|
1571
1649
|
|
|
1572
1650
|
async def sentence_similarity(
|
|
1573
|
-
self, sentence: str, other_sentences:
|
|
1574
|
-
) ->
|
|
1651
|
+
self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
|
|
1652
|
+
) -> list[float]:
|
|
1575
1653
|
"""
|
|
1576
1654
|
Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
|
|
1577
1655
|
|
|
1578
1656
|
Args:
|
|
1579
1657
|
sentence (`str`):
|
|
1580
1658
|
The main sentence to compare to others.
|
|
1581
|
-
other_sentences (`
|
|
1659
|
+
other_sentences (`list[str]`):
|
|
1582
1660
|
The list of sentences to compare to.
|
|
1583
1661
|
model (`str`, *optional*):
|
|
1584
1662
|
The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1586,12 +1664,12 @@ class AsyncInferenceClient:
|
|
|
1586
1664
|
Defaults to None.
|
|
1587
1665
|
|
|
1588
1666
|
Returns:
|
|
1589
|
-
`
|
|
1667
|
+
`list[float]`: The embedding representing the input text.
|
|
1590
1668
|
|
|
1591
1669
|
Raises:
|
|
1592
1670
|
[`InferenceTimeoutError`]:
|
|
1593
1671
|
If the model is unavailable or the request times out.
|
|
1594
|
-
`
|
|
1672
|
+
[`HfHubHTTPError`]:
|
|
1595
1673
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1596
1674
|
|
|
1597
1675
|
Example:
|
|
@@ -1629,7 +1707,7 @@ class AsyncInferenceClient:
|
|
|
1629
1707
|
*,
|
|
1630
1708
|
model: Optional[str] = None,
|
|
1631
1709
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1632
|
-
generate_parameters: Optional[
|
|
1710
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
1633
1711
|
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1634
1712
|
) -> SummarizationOutput:
|
|
1635
1713
|
"""
|
|
@@ -1643,7 +1721,7 @@ class AsyncInferenceClient:
|
|
|
1643
1721
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
1644
1722
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
1645
1723
|
Whether to clean up the potential extra spaces in the text output.
|
|
1646
|
-
generate_parameters (`
|
|
1724
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
1647
1725
|
Additional parametrization of the text generation algorithm.
|
|
1648
1726
|
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1649
1727
|
The truncation strategy to use.
|
|
@@ -1653,7 +1731,7 @@ class AsyncInferenceClient:
|
|
|
1653
1731
|
Raises:
|
|
1654
1732
|
[`InferenceTimeoutError`]:
|
|
1655
1733
|
If the model is unavailable or the request times out.
|
|
1656
|
-
`
|
|
1734
|
+
[`HfHubHTTPError`]:
|
|
1657
1735
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1658
1736
|
|
|
1659
1737
|
Example:
|
|
@@ -1684,7 +1762,7 @@ class AsyncInferenceClient:
|
|
|
1684
1762
|
|
|
1685
1763
|
async def table_question_answering(
|
|
1686
1764
|
self,
|
|
1687
|
-
table:
|
|
1765
|
+
table: dict[str, Any],
|
|
1688
1766
|
query: str,
|
|
1689
1767
|
*,
|
|
1690
1768
|
model: Optional[str] = None,
|
|
@@ -1719,7 +1797,7 @@ class AsyncInferenceClient:
|
|
|
1719
1797
|
Raises:
|
|
1720
1798
|
[`InferenceTimeoutError`]:
|
|
1721
1799
|
If the model is unavailable or the request times out.
|
|
1722
|
-
`
|
|
1800
|
+
[`HfHubHTTPError`]:
|
|
1723
1801
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1724
1802
|
|
|
1725
1803
|
Example:
|
|
@@ -1745,12 +1823,12 @@ class AsyncInferenceClient:
|
|
|
1745
1823
|
response = await self._inner_post(request_parameters)
|
|
1746
1824
|
return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
|
|
1747
1825
|
|
|
1748
|
-
async def tabular_classification(self, table:
|
|
1826
|
+
async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
|
|
1749
1827
|
"""
|
|
1750
1828
|
Classifying a target category (a group) based on a set of attributes.
|
|
1751
1829
|
|
|
1752
1830
|
Args:
|
|
1753
|
-
table (`
|
|
1831
|
+
table (`dict[str, Any]`):
|
|
1754
1832
|
Set of attributes to classify.
|
|
1755
1833
|
model (`str`, *optional*):
|
|
1756
1834
|
The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1763,7 +1841,7 @@ class AsyncInferenceClient:
|
|
|
1763
1841
|
Raises:
|
|
1764
1842
|
[`InferenceTimeoutError`]:
|
|
1765
1843
|
If the model is unavailable or the request times out.
|
|
1766
|
-
`
|
|
1844
|
+
[`HfHubHTTPError`]:
|
|
1767
1845
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1768
1846
|
|
|
1769
1847
|
Example:
|
|
@@ -1801,12 +1879,12 @@ class AsyncInferenceClient:
|
|
|
1801
1879
|
response = await self._inner_post(request_parameters)
|
|
1802
1880
|
return _bytes_to_list(response)
|
|
1803
1881
|
|
|
1804
|
-
async def tabular_regression(self, table:
|
|
1882
|
+
async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
|
|
1805
1883
|
"""
|
|
1806
1884
|
Predicting a numerical target value given a set of attributes/features in a table.
|
|
1807
1885
|
|
|
1808
1886
|
Args:
|
|
1809
|
-
table (`
|
|
1887
|
+
table (`dict[str, Any]`):
|
|
1810
1888
|
Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
|
|
1811
1889
|
model (`str`, *optional*):
|
|
1812
1890
|
The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1819,7 +1897,7 @@ class AsyncInferenceClient:
|
|
|
1819
1897
|
Raises:
|
|
1820
1898
|
[`InferenceTimeoutError`]:
|
|
1821
1899
|
If the model is unavailable or the request times out.
|
|
1822
|
-
`
|
|
1900
|
+
[`HfHubHTTPError`]:
|
|
1823
1901
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1824
1902
|
|
|
1825
1903
|
Example:
|
|
@@ -1859,7 +1937,7 @@ class AsyncInferenceClient:
|
|
|
1859
1937
|
model: Optional[str] = None,
|
|
1860
1938
|
top_k: Optional[int] = None,
|
|
1861
1939
|
function_to_apply: Optional["TextClassificationOutputTransform"] = None,
|
|
1862
|
-
) ->
|
|
1940
|
+
) -> list[TextClassificationOutputElement]:
|
|
1863
1941
|
"""
|
|
1864
1942
|
Perform text classification (e.g. sentiment-analysis) on the given text.
|
|
1865
1943
|
|
|
@@ -1876,12 +1954,12 @@ class AsyncInferenceClient:
|
|
|
1876
1954
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
1877
1955
|
|
|
1878
1956
|
Returns:
|
|
1879
|
-
`
|
|
1957
|
+
`list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1880
1958
|
|
|
1881
1959
|
Raises:
|
|
1882
1960
|
[`InferenceTimeoutError`]:
|
|
1883
1961
|
If the model is unavailable or the request times out.
|
|
1884
|
-
`
|
|
1962
|
+
[`HfHubHTTPError`]:
|
|
1885
1963
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1886
1964
|
|
|
1887
1965
|
Example:
|
|
@@ -1930,8 +2008,8 @@ class AsyncInferenceClient:
|
|
|
1930
2008
|
repetition_penalty: Optional[float] = None,
|
|
1931
2009
|
return_full_text: Optional[bool] = None,
|
|
1932
2010
|
seed: Optional[int] = None,
|
|
1933
|
-
stop: Optional[
|
|
1934
|
-
stop_sequences: Optional[
|
|
2011
|
+
stop: Optional[list[str]] = None,
|
|
2012
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1935
2013
|
temperature: Optional[float] = None,
|
|
1936
2014
|
top_k: Optional[int] = None,
|
|
1937
2015
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1960,8 +2038,8 @@ class AsyncInferenceClient:
|
|
|
1960
2038
|
repetition_penalty: Optional[float] = None,
|
|
1961
2039
|
return_full_text: Optional[bool] = None,
|
|
1962
2040
|
seed: Optional[int] = None,
|
|
1963
|
-
stop: Optional[
|
|
1964
|
-
stop_sequences: Optional[
|
|
2041
|
+
stop: Optional[list[str]] = None,
|
|
2042
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1965
2043
|
temperature: Optional[float] = None,
|
|
1966
2044
|
top_k: Optional[int] = None,
|
|
1967
2045
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1990,8 +2068,8 @@ class AsyncInferenceClient:
|
|
|
1990
2068
|
repetition_penalty: Optional[float] = None,
|
|
1991
2069
|
return_full_text: Optional[bool] = None, # Manual default value
|
|
1992
2070
|
seed: Optional[int] = None,
|
|
1993
|
-
stop: Optional[
|
|
1994
|
-
stop_sequences: Optional[
|
|
2071
|
+
stop: Optional[list[str]] = None,
|
|
2072
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1995
2073
|
temperature: Optional[float] = None,
|
|
1996
2074
|
top_k: Optional[int] = None,
|
|
1997
2075
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2020,8 +2098,8 @@ class AsyncInferenceClient:
|
|
|
2020
2098
|
repetition_penalty: Optional[float] = None,
|
|
2021
2099
|
return_full_text: Optional[bool] = None,
|
|
2022
2100
|
seed: Optional[int] = None,
|
|
2023
|
-
stop: Optional[
|
|
2024
|
-
stop_sequences: Optional[
|
|
2101
|
+
stop: Optional[list[str]] = None,
|
|
2102
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2025
2103
|
temperature: Optional[float] = None,
|
|
2026
2104
|
top_k: Optional[int] = None,
|
|
2027
2105
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2050,8 +2128,8 @@ class AsyncInferenceClient:
|
|
|
2050
2128
|
repetition_penalty: Optional[float] = None,
|
|
2051
2129
|
return_full_text: Optional[bool] = None,
|
|
2052
2130
|
seed: Optional[int] = None,
|
|
2053
|
-
stop: Optional[
|
|
2054
|
-
stop_sequences: Optional[
|
|
2131
|
+
stop: Optional[list[str]] = None,
|
|
2132
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2055
2133
|
temperature: Optional[float] = None,
|
|
2056
2134
|
top_k: Optional[int] = None,
|
|
2057
2135
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2079,8 +2157,8 @@ class AsyncInferenceClient:
|
|
|
2079
2157
|
repetition_penalty: Optional[float] = None,
|
|
2080
2158
|
return_full_text: Optional[bool] = None,
|
|
2081
2159
|
seed: Optional[int] = None,
|
|
2082
|
-
stop: Optional[
|
|
2083
|
-
stop_sequences: Optional[
|
|
2160
|
+
stop: Optional[list[str]] = None,
|
|
2161
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2084
2162
|
temperature: Optional[float] = None,
|
|
2085
2163
|
top_k: Optional[int] = None,
|
|
2086
2164
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2136,9 +2214,9 @@ class AsyncInferenceClient:
|
|
|
2136
2214
|
Whether to prepend the prompt to the generated text
|
|
2137
2215
|
seed (`int`, *optional*):
|
|
2138
2216
|
Random sampling seed
|
|
2139
|
-
stop (`
|
|
2217
|
+
stop (`list[str]`, *optional*):
|
|
2140
2218
|
Stop generating tokens if a member of `stop` is generated.
|
|
2141
|
-
stop_sequences (`
|
|
2219
|
+
stop_sequences (`list[str]`, *optional*):
|
|
2142
2220
|
Deprecated argument. Use `stop` instead.
|
|
2143
2221
|
temperature (`float`, *optional*):
|
|
2144
2222
|
The value used to module the logits distribution.
|
|
@@ -2159,10 +2237,10 @@ class AsyncInferenceClient:
|
|
|
2159
2237
|
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
|
|
2160
2238
|
|
|
2161
2239
|
Returns:
|
|
2162
|
-
`Union[str, TextGenerationOutput,
|
|
2240
|
+
`Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
|
|
2163
2241
|
Generated text returned from the server:
|
|
2164
2242
|
- if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
|
|
2165
|
-
- if `stream=True` and `details=False`, the generated text is returned token by token as a `
|
|
2243
|
+
- if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
|
|
2166
2244
|
- if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
|
|
2167
2245
|
- if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
|
|
2168
2246
|
|
|
@@ -2171,7 +2249,7 @@ class AsyncInferenceClient:
|
|
|
2171
2249
|
If input values are not valid. No HTTP call is made to the server.
|
|
2172
2250
|
[`InferenceTimeoutError`]:
|
|
2173
2251
|
If the model is unavailable or the request times out.
|
|
2174
|
-
`
|
|
2252
|
+
[`HfHubHTTPError`]:
|
|
2175
2253
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2176
2254
|
|
|
2177
2255
|
Example:
|
|
@@ -2361,9 +2439,9 @@ class AsyncInferenceClient:
|
|
|
2361
2439
|
# Handle errors separately for more precise error messages
|
|
2362
2440
|
try:
|
|
2363
2441
|
bytes_output = await self._inner_post(request_parameters, stream=stream or False)
|
|
2364
|
-
except
|
|
2365
|
-
match = MODEL_KWARGS_NOT_USED_REGEX.search(e
|
|
2366
|
-
if e
|
|
2442
|
+
except HfHubHTTPError as e:
|
|
2443
|
+
match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
|
|
2444
|
+
if isinstance(e, BadRequestError) and match:
|
|
2367
2445
|
unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
|
|
2368
2446
|
_set_unsupported_text_generation_kwargs(model, unused_params)
|
|
2369
2447
|
return await self.text_generation( # type: ignore
|
|
@@ -2416,7 +2494,7 @@ class AsyncInferenceClient:
|
|
|
2416
2494
|
model: Optional[str] = None,
|
|
2417
2495
|
scheduler: Optional[str] = None,
|
|
2418
2496
|
seed: Optional[int] = None,
|
|
2419
|
-
extra_body: Optional[
|
|
2497
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2420
2498
|
) -> "Image":
|
|
2421
2499
|
"""
|
|
2422
2500
|
Generate an image based on a given text using a specified model.
|
|
@@ -2454,7 +2532,7 @@ class AsyncInferenceClient:
|
|
|
2454
2532
|
Override the scheduler with a compatible one.
|
|
2455
2533
|
seed (`int`, *optional*):
|
|
2456
2534
|
Seed for the random number generator.
|
|
2457
|
-
extra_body (`
|
|
2535
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2458
2536
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2459
2537
|
for supported parameters.
|
|
2460
2538
|
|
|
@@ -2464,7 +2542,7 @@ class AsyncInferenceClient:
|
|
|
2464
2542
|
Raises:
|
|
2465
2543
|
[`InferenceTimeoutError`]:
|
|
2466
2544
|
If the model is unavailable or the request times out.
|
|
2467
|
-
`
|
|
2545
|
+
[`HfHubHTTPError`]:
|
|
2468
2546
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2469
2547
|
|
|
2470
2548
|
Example:
|
|
@@ -2554,11 +2632,11 @@ class AsyncInferenceClient:
|
|
|
2554
2632
|
*,
|
|
2555
2633
|
model: Optional[str] = None,
|
|
2556
2634
|
guidance_scale: Optional[float] = None,
|
|
2557
|
-
negative_prompt: Optional[
|
|
2635
|
+
negative_prompt: Optional[list[str]] = None,
|
|
2558
2636
|
num_frames: Optional[float] = None,
|
|
2559
2637
|
num_inference_steps: Optional[int] = None,
|
|
2560
2638
|
seed: Optional[int] = None,
|
|
2561
|
-
extra_body: Optional[
|
|
2639
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2562
2640
|
) -> bytes:
|
|
2563
2641
|
"""
|
|
2564
2642
|
Generate a video based on a given text.
|
|
@@ -2577,7 +2655,7 @@ class AsyncInferenceClient:
|
|
|
2577
2655
|
guidance_scale (`float`, *optional*):
|
|
2578
2656
|
A higher guidance scale value encourages the model to generate videos closely linked to the text
|
|
2579
2657
|
prompt, but values too high may cause saturation and other artifacts.
|
|
2580
|
-
negative_prompt (`
|
|
2658
|
+
negative_prompt (`list[str]`, *optional*):
|
|
2581
2659
|
One or several prompt to guide what NOT to include in video generation.
|
|
2582
2660
|
num_frames (`float`, *optional*):
|
|
2583
2661
|
The num_frames parameter determines how many video frames are generated.
|
|
@@ -2586,7 +2664,7 @@ class AsyncInferenceClient:
|
|
|
2586
2664
|
expense of slower inference.
|
|
2587
2665
|
seed (`int`, *optional*):
|
|
2588
2666
|
Seed for the random number generator.
|
|
2589
|
-
extra_body (`
|
|
2667
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2590
2668
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2591
2669
|
for supported parameters.
|
|
2592
2670
|
|
|
@@ -2666,7 +2744,7 @@ class AsyncInferenceClient:
|
|
|
2666
2744
|
top_p: Optional[float] = None,
|
|
2667
2745
|
typical_p: Optional[float] = None,
|
|
2668
2746
|
use_cache: Optional[bool] = None,
|
|
2669
|
-
extra_body: Optional[
|
|
2747
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2670
2748
|
) -> bytes:
|
|
2671
2749
|
"""
|
|
2672
2750
|
Synthesize an audio of a voice pronouncing a given text.
|
|
@@ -2728,7 +2806,7 @@ class AsyncInferenceClient:
|
|
|
2728
2806
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2729
2807
|
use_cache (`bool`, *optional*):
|
|
2730
2808
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2731
|
-
extra_body (`
|
|
2809
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2732
2810
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2733
2811
|
for supported parameters.
|
|
2734
2812
|
Returns:
|
|
@@ -2737,7 +2815,7 @@ class AsyncInferenceClient:
|
|
|
2737
2815
|
Raises:
|
|
2738
2816
|
[`InferenceTimeoutError`]:
|
|
2739
2817
|
If the model is unavailable or the request times out.
|
|
2740
|
-
`
|
|
2818
|
+
[`HfHubHTTPError`]:
|
|
2741
2819
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2742
2820
|
|
|
2743
2821
|
Example:
|
|
@@ -2861,9 +2939,9 @@ class AsyncInferenceClient:
|
|
|
2861
2939
|
*,
|
|
2862
2940
|
model: Optional[str] = None,
|
|
2863
2941
|
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2864
|
-
ignore_labels: Optional[
|
|
2942
|
+
ignore_labels: Optional[list[str]] = None,
|
|
2865
2943
|
stride: Optional[int] = None,
|
|
2866
|
-
) ->
|
|
2944
|
+
) -> list[TokenClassificationOutputElement]:
|
|
2867
2945
|
"""
|
|
2868
2946
|
Perform token classification on the given text.
|
|
2869
2947
|
Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
|
|
@@ -2877,18 +2955,18 @@ class AsyncInferenceClient:
|
|
|
2877
2955
|
Defaults to None.
|
|
2878
2956
|
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2879
2957
|
The strategy used to fuse tokens based on model predictions
|
|
2880
|
-
ignore_labels (`
|
|
2958
|
+
ignore_labels (`list[str`, *optional*):
|
|
2881
2959
|
A list of labels to ignore
|
|
2882
2960
|
stride (`int`, *optional*):
|
|
2883
2961
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2884
2962
|
|
|
2885
2963
|
Returns:
|
|
2886
|
-
`
|
|
2964
|
+
`list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
|
|
2887
2965
|
|
|
2888
2966
|
Raises:
|
|
2889
2967
|
[`InferenceTimeoutError`]:
|
|
2890
2968
|
If the model is unavailable or the request times out.
|
|
2891
|
-
`
|
|
2969
|
+
[`HfHubHTTPError`]:
|
|
2892
2970
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2893
2971
|
|
|
2894
2972
|
Example:
|
|
@@ -2940,7 +3018,7 @@ class AsyncInferenceClient:
|
|
|
2940
3018
|
tgt_lang: Optional[str] = None,
|
|
2941
3019
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2942
3020
|
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
2943
|
-
generate_parameters: Optional[
|
|
3021
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
2944
3022
|
) -> TranslationOutput:
|
|
2945
3023
|
"""
|
|
2946
3024
|
Convert text from one language to another.
|
|
@@ -2965,7 +3043,7 @@ class AsyncInferenceClient:
|
|
|
2965
3043
|
Whether to clean up the potential extra spaces in the text output.
|
|
2966
3044
|
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
2967
3045
|
The truncation strategy to use.
|
|
2968
|
-
generate_parameters (`
|
|
3046
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
2969
3047
|
Additional parametrization of the text generation algorithm.
|
|
2970
3048
|
|
|
2971
3049
|
Returns:
|
|
@@ -2974,7 +3052,7 @@ class AsyncInferenceClient:
|
|
|
2974
3052
|
Raises:
|
|
2975
3053
|
[`InferenceTimeoutError`]:
|
|
2976
3054
|
If the model is unavailable or the request times out.
|
|
2977
|
-
`
|
|
3055
|
+
[`HfHubHTTPError`]:
|
|
2978
3056
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2979
3057
|
`ValueError`:
|
|
2980
3058
|
If only one of the `src_lang` and `tgt_lang` arguments are provided.
|
|
@@ -3028,7 +3106,7 @@ class AsyncInferenceClient:
|
|
|
3028
3106
|
*,
|
|
3029
3107
|
model: Optional[str] = None,
|
|
3030
3108
|
top_k: Optional[int] = None,
|
|
3031
|
-
) ->
|
|
3109
|
+
) -> list[VisualQuestionAnsweringOutputElement]:
|
|
3032
3110
|
"""
|
|
3033
3111
|
Answering open-ended questions based on an image.
|
|
3034
3112
|
|
|
@@ -3045,12 +3123,12 @@ class AsyncInferenceClient:
|
|
|
3045
3123
|
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
3046
3124
|
topk answers if there are not enough options available within the context.
|
|
3047
3125
|
Returns:
|
|
3048
|
-
`
|
|
3126
|
+
`list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
3049
3127
|
|
|
3050
3128
|
Raises:
|
|
3051
3129
|
`InferenceTimeoutError`:
|
|
3052
3130
|
If the model is unavailable or the request times out.
|
|
3053
|
-
`
|
|
3131
|
+
[`HfHubHTTPError`]:
|
|
3054
3132
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3055
3133
|
|
|
3056
3134
|
Example:
|
|
@@ -3084,21 +3162,21 @@ class AsyncInferenceClient:
|
|
|
3084
3162
|
async def zero_shot_classification(
|
|
3085
3163
|
self,
|
|
3086
3164
|
text: str,
|
|
3087
|
-
candidate_labels:
|
|
3165
|
+
candidate_labels: list[str],
|
|
3088
3166
|
*,
|
|
3089
3167
|
multi_label: Optional[bool] = False,
|
|
3090
3168
|
hypothesis_template: Optional[str] = None,
|
|
3091
3169
|
model: Optional[str] = None,
|
|
3092
|
-
) ->
|
|
3170
|
+
) -> list[ZeroShotClassificationOutputElement]:
|
|
3093
3171
|
"""
|
|
3094
3172
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
3095
3173
|
|
|
3096
3174
|
Args:
|
|
3097
3175
|
text (`str`):
|
|
3098
3176
|
The input text to classify.
|
|
3099
|
-
candidate_labels (`
|
|
3177
|
+
candidate_labels (`list[str]`):
|
|
3100
3178
|
The set of possible class labels to classify the text into.
|
|
3101
|
-
labels (`
|
|
3179
|
+
labels (`list[str]`, *optional*):
|
|
3102
3180
|
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
3103
3181
|
multi_label (`bool`, *optional*):
|
|
3104
3182
|
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
@@ -3113,12 +3191,12 @@ class AsyncInferenceClient:
|
|
|
3113
3191
|
|
|
3114
3192
|
|
|
3115
3193
|
Returns:
|
|
3116
|
-
`
|
|
3194
|
+
`list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3117
3195
|
|
|
3118
3196
|
Raises:
|
|
3119
3197
|
[`InferenceTimeoutError`]:
|
|
3120
3198
|
If the model is unavailable or the request times out.
|
|
3121
|
-
`
|
|
3199
|
+
[`HfHubHTTPError`]:
|
|
3122
3200
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3123
3201
|
|
|
3124
3202
|
Example with `multi_label=False`:
|
|
@@ -3192,22 +3270,22 @@ class AsyncInferenceClient:
|
|
|
3192
3270
|
async def zero_shot_image_classification(
|
|
3193
3271
|
self,
|
|
3194
3272
|
image: ContentT,
|
|
3195
|
-
candidate_labels:
|
|
3273
|
+
candidate_labels: list[str],
|
|
3196
3274
|
*,
|
|
3197
3275
|
model: Optional[str] = None,
|
|
3198
3276
|
hypothesis_template: Optional[str] = None,
|
|
3199
3277
|
# deprecated argument
|
|
3200
|
-
labels:
|
|
3201
|
-
) ->
|
|
3278
|
+
labels: list[str] = None, # type: ignore
|
|
3279
|
+
) -> list[ZeroShotImageClassificationOutputElement]:
|
|
3202
3280
|
"""
|
|
3203
3281
|
Provide input image and text labels to predict text labels for the image.
|
|
3204
3282
|
|
|
3205
3283
|
Args:
|
|
3206
3284
|
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3207
3285
|
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3208
|
-
candidate_labels (`
|
|
3286
|
+
candidate_labels (`list[str]`):
|
|
3209
3287
|
The candidate labels for this image
|
|
3210
|
-
labels (`
|
|
3288
|
+
labels (`list[str]`, *optional*):
|
|
3211
3289
|
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
3212
3290
|
model (`str`, *optional*):
|
|
3213
3291
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
@@ -3217,12 +3295,12 @@ class AsyncInferenceClient:
|
|
|
3217
3295
|
replacing the placeholder with the candidate labels.
|
|
3218
3296
|
|
|
3219
3297
|
Returns:
|
|
3220
|
-
`
|
|
3298
|
+
`list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3221
3299
|
|
|
3222
3300
|
Raises:
|
|
3223
3301
|
[`InferenceTimeoutError`]:
|
|
3224
3302
|
If the model is unavailable or the request times out.
|
|
3225
|
-
`
|
|
3303
|
+
[`HfHubHTTPError`]:
|
|
3226
3304
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3227
3305
|
|
|
3228
3306
|
Example:
|
|
@@ -3257,144 +3335,7 @@ class AsyncInferenceClient:
|
|
|
3257
3335
|
response = await self._inner_post(request_parameters)
|
|
3258
3336
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3259
3337
|
|
|
3260
|
-
|
|
3261
|
-
version="0.35.0",
|
|
3262
|
-
message=(
|
|
3263
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3264
|
-
" Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
|
|
3265
|
-
),
|
|
3266
|
-
)
|
|
3267
|
-
async def list_deployed_models(
|
|
3268
|
-
self, frameworks: Union[None, str, Literal["all"], List[str]] = None
|
|
3269
|
-
) -> Dict[str, List[str]]:
|
|
3270
|
-
"""
|
|
3271
|
-
List models deployed on the HF Serverless Inference API service.
|
|
3272
|
-
|
|
3273
|
-
This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
|
|
3274
|
-
are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
|
|
3275
|
-
specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
|
|
3276
|
-
in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
|
|
3277
|
-
frameworks are checked, the more time it will take.
|
|
3278
|
-
|
|
3279
|
-
<Tip warning={true}>
|
|
3280
|
-
|
|
3281
|
-
This endpoint method does not return a live list of all models available for the HF Inference API service.
|
|
3282
|
-
It searches over a cached list of models that were recently available and the list may not be up to date.
|
|
3283
|
-
If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
|
|
3284
|
-
|
|
3285
|
-
</Tip>
|
|
3286
|
-
|
|
3287
|
-
<Tip>
|
|
3288
|
-
|
|
3289
|
-
This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
|
|
3290
|
-
check its availability, you can directly use [`~InferenceClient.get_model_status`].
|
|
3291
|
-
|
|
3292
|
-
</Tip>
|
|
3293
|
-
|
|
3294
|
-
Args:
|
|
3295
|
-
frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
|
|
3296
|
-
The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
|
|
3297
|
-
"all", all available frameworks will be tested. It is also possible to provide a single framework or a
|
|
3298
|
-
custom set of frameworks to check.
|
|
3299
|
-
|
|
3300
|
-
Returns:
|
|
3301
|
-
`Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
|
|
3302
|
-
|
|
3303
|
-
Example:
|
|
3304
|
-
```py
|
|
3305
|
-
# Must be run in an async contextthon
|
|
3306
|
-
>>> from huggingface_hub import AsyncInferenceClient
|
|
3307
|
-
>>> client = AsyncInferenceClient()
|
|
3308
|
-
|
|
3309
|
-
# Discover zero-shot-classification models currently deployed
|
|
3310
|
-
>>> models = await client.list_deployed_models()
|
|
3311
|
-
>>> models["zero-shot-classification"]
|
|
3312
|
-
['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
|
|
3313
|
-
|
|
3314
|
-
# List from only 1 framework
|
|
3315
|
-
>>> await client.list_deployed_models("text-generation-inference")
|
|
3316
|
-
{'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
|
|
3317
|
-
```
|
|
3318
|
-
"""
|
|
3319
|
-
if self.provider != "hf-inference":
|
|
3320
|
-
raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
|
|
3321
|
-
|
|
3322
|
-
# Resolve which frameworks to check
|
|
3323
|
-
if frameworks is None:
|
|
3324
|
-
frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
|
|
3325
|
-
elif frameworks == "all":
|
|
3326
|
-
frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
|
|
3327
|
-
elif isinstance(frameworks, str):
|
|
3328
|
-
frameworks = [frameworks]
|
|
3329
|
-
frameworks = list(set(frameworks))
|
|
3330
|
-
|
|
3331
|
-
# Fetch them iteratively
|
|
3332
|
-
models_by_task: Dict[str, List[str]] = {}
|
|
3333
|
-
|
|
3334
|
-
def _unpack_response(framework: str, items: List[Dict]) -> None:
|
|
3335
|
-
for model in items:
|
|
3336
|
-
if framework == "sentence-transformers":
|
|
3337
|
-
# Model running with the `sentence-transformers` framework can work with both tasks even if not
|
|
3338
|
-
# branded as such in the API response
|
|
3339
|
-
models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
|
|
3340
|
-
models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
|
|
3341
|
-
else:
|
|
3342
|
-
models_by_task.setdefault(model["task"], []).append(model["model_id"])
|
|
3343
|
-
|
|
3344
|
-
for framework in frameworks:
|
|
3345
|
-
response = get_session().get(
|
|
3346
|
-
f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3347
|
-
)
|
|
3348
|
-
hf_raise_for_status(response)
|
|
3349
|
-
_unpack_response(framework, response.json())
|
|
3350
|
-
|
|
3351
|
-
# Sort alphabetically for discoverability and return
|
|
3352
|
-
for task, models in models_by_task.items():
|
|
3353
|
-
models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
|
|
3354
|
-
return models_by_task
|
|
3355
|
-
|
|
3356
|
-
def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
|
|
3357
|
-
aiohttp = _import_aiohttp()
|
|
3358
|
-
client_headers = self.headers.copy()
|
|
3359
|
-
if headers is not None:
|
|
3360
|
-
client_headers.update(headers)
|
|
3361
|
-
|
|
3362
|
-
# Return a new aiohttp ClientSession with correct settings.
|
|
3363
|
-
session = aiohttp.ClientSession(
|
|
3364
|
-
headers=client_headers,
|
|
3365
|
-
cookies=self.cookies,
|
|
3366
|
-
timeout=aiohttp.ClientTimeout(self.timeout),
|
|
3367
|
-
trust_env=self.trust_env,
|
|
3368
|
-
)
|
|
3369
|
-
|
|
3370
|
-
# Keep track of sessions to close them later
|
|
3371
|
-
self._sessions[session] = set()
|
|
3372
|
-
|
|
3373
|
-
# Override the `._request` method to register responses to be closed
|
|
3374
|
-
session._wrapped_request = session._request
|
|
3375
|
-
|
|
3376
|
-
async def _request(method, url, **kwargs):
|
|
3377
|
-
response = await session._wrapped_request(method, url, **kwargs)
|
|
3378
|
-
self._sessions[session].add(response)
|
|
3379
|
-
return response
|
|
3380
|
-
|
|
3381
|
-
session._request = _request
|
|
3382
|
-
|
|
3383
|
-
# Override the 'close' method to
|
|
3384
|
-
# 1. close ongoing responses
|
|
3385
|
-
# 2. deregister the session when closed
|
|
3386
|
-
session._close = session.close
|
|
3387
|
-
|
|
3388
|
-
async def close_session():
|
|
3389
|
-
for response in self._sessions[session]:
|
|
3390
|
-
response.close()
|
|
3391
|
-
await session._close()
|
|
3392
|
-
self._sessions.pop(session, None)
|
|
3393
|
-
|
|
3394
|
-
session.close = close_session
|
|
3395
|
-
return session
|
|
3396
|
-
|
|
3397
|
-
async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
|
|
3338
|
+
async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
|
|
3398
3339
|
"""
|
|
3399
3340
|
Get information about the deployed endpoint.
|
|
3400
3341
|
|
|
@@ -3407,7 +3348,7 @@ class AsyncInferenceClient:
|
|
|
3407
3348
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
3408
3349
|
|
|
3409
3350
|
Returns:
|
|
3410
|
-
`
|
|
3351
|
+
`dict[str, Any]`: Information about the endpoint.
|
|
3411
3352
|
|
|
3412
3353
|
Example:
|
|
3413
3354
|
```py
|
|
@@ -3449,17 +3390,16 @@ class AsyncInferenceClient:
|
|
|
3449
3390
|
else:
|
|
3450
3391
|
url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
|
|
3451
3392
|
|
|
3452
|
-
|
|
3453
|
-
|
|
3454
|
-
|
|
3455
|
-
|
|
3393
|
+
client = await self._get_async_client()
|
|
3394
|
+
response = await client.get(url, headers=build_hf_headers(token=self.token))
|
|
3395
|
+
hf_raise_for_status(response)
|
|
3396
|
+
return response.json()
|
|
3456
3397
|
|
|
3457
3398
|
async def health_check(self, model: Optional[str] = None) -> bool:
|
|
3458
3399
|
"""
|
|
3459
3400
|
Check the health of the deployed endpoint.
|
|
3460
3401
|
|
|
3461
3402
|
Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
|
|
3462
|
-
For Inference API, please use [`InferenceClient.get_model_status`] instead.
|
|
3463
3403
|
|
|
3464
3404
|
Args:
|
|
3465
3405
|
model (`str`, *optional*):
|
|
@@ -3484,77 +3424,12 @@ class AsyncInferenceClient:
|
|
|
3484
3424
|
if model is None:
|
|
3485
3425
|
raise ValueError("Model id not provided.")
|
|
3486
3426
|
if not model.startswith(("http://", "https://")):
|
|
3487
|
-
raise ValueError(
|
|
3488
|
-
"Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
|
|
3489
|
-
)
|
|
3427
|
+
raise ValueError("Model must be an Inference Endpoint URL.")
|
|
3490
3428
|
url = model.rstrip("/") + "/health"
|
|
3491
3429
|
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
@_deprecate_method(
|
|
3497
|
-
version="0.35.0",
|
|
3498
|
-
message=(
|
|
3499
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3500
|
-
" Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
|
|
3501
|
-
),
|
|
3502
|
-
)
|
|
3503
|
-
async def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
|
|
3504
|
-
"""
|
|
3505
|
-
Get the status of a model hosted on the HF Inference API.
|
|
3506
|
-
|
|
3507
|
-
<Tip>
|
|
3508
|
-
|
|
3509
|
-
This endpoint is mostly useful when you already know which model you want to use and want to check its
|
|
3510
|
-
availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
|
|
3511
|
-
|
|
3512
|
-
</Tip>
|
|
3513
|
-
|
|
3514
|
-
Args:
|
|
3515
|
-
model (`str`, *optional*):
|
|
3516
|
-
Identifier of the model for witch the status gonna be checked. If model is not provided,
|
|
3517
|
-
the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
|
|
3518
|
-
identifier cannot be a URL.
|
|
3519
|
-
|
|
3520
|
-
|
|
3521
|
-
Returns:
|
|
3522
|
-
[`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
|
|
3523
|
-
about the state of the model: load, state, compute type and framework.
|
|
3524
|
-
|
|
3525
|
-
Example:
|
|
3526
|
-
```py
|
|
3527
|
-
# Must be run in an async context
|
|
3528
|
-
>>> from huggingface_hub import AsyncInferenceClient
|
|
3529
|
-
>>> client = AsyncInferenceClient()
|
|
3530
|
-
>>> await client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
|
|
3531
|
-
ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
|
|
3532
|
-
```
|
|
3533
|
-
"""
|
|
3534
|
-
if self.provider != "hf-inference":
|
|
3535
|
-
raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
|
|
3536
|
-
|
|
3537
|
-
model = model or self.model
|
|
3538
|
-
if model is None:
|
|
3539
|
-
raise ValueError("Model id not provided.")
|
|
3540
|
-
if model.startswith("https://"):
|
|
3541
|
-
raise NotImplementedError("Model status is only available for Inference API endpoints.")
|
|
3542
|
-
url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
|
|
3543
|
-
|
|
3544
|
-
async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
|
|
3545
|
-
response = await client.get(url, proxy=self.proxies)
|
|
3546
|
-
response.raise_for_status()
|
|
3547
|
-
response_data = await response.json()
|
|
3548
|
-
|
|
3549
|
-
if "error" in response_data:
|
|
3550
|
-
raise ValueError(response_data["error"])
|
|
3551
|
-
|
|
3552
|
-
return ModelStatus(
|
|
3553
|
-
loaded=response_data["loaded"],
|
|
3554
|
-
state=response_data["state"],
|
|
3555
|
-
compute_type=response_data["compute_type"],
|
|
3556
|
-
framework=response_data["framework"],
|
|
3557
|
-
)
|
|
3430
|
+
client = await self._get_async_client()
|
|
3431
|
+
response = await client.get(url, headers=build_hf_headers(token=self.token))
|
|
3432
|
+
return response.status_code == 200
|
|
3558
3433
|
|
|
3559
3434
|
@property
|
|
3560
3435
|
def chat(self) -> "ProxyClientChat":
|