huggingface-hub 0.34.4__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +46 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +167 -10
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +13 -14
- huggingface_hub/_upload_large_folder.py +15 -15
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/_cli_utils.py +2 -2
- huggingface_hub/cli/auth.py +5 -6
- huggingface_hub/cli/cache.py +14 -20
- huggingface_hub/cli/download.py +4 -4
- huggingface_hub/cli/jobs.py +560 -11
- huggingface_hub/cli/lfs.py +4 -4
- huggingface_hub/cli/repo.py +7 -7
- huggingface_hub/cli/repo_files.py +2 -2
- huggingface_hub/cli/upload.py +4 -4
- huggingface_hub/cli/upload_large_folder.py +3 -3
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +13 -13
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +5 -6
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -19
- huggingface_hub/errors.py +42 -29
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +153 -252
- huggingface_hub/hf_api.py +815 -600
- huggingface_hub/hf_file_system.py +98 -62
- huggingface_hub/hub_mixin.py +37 -57
- huggingface_hub/inference/_client.py +177 -325
- huggingface_hub/inference/_common.py +110 -124
- huggingface_hub/inference/_generated/_async_client.py +226 -432
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +2 -3
- huggingface_hub/inference/_mcp/mcp_client.py +58 -30
- huggingface_hub/inference/_mcp/types.py +10 -7
- huggingface_hub/inference/_mcp/utils.py +11 -7
- huggingface_hub/inference/_providers/__init__.py +2 -2
- huggingface_hub/inference/_providers/_common.py +49 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +28 -20
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +20 -31
- huggingface_hub/repocard.py +18 -18
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +4 -4
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +366 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +15 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +32 -11
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
- huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.34.4.dist-info/RECORD +0 -166
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -21,16 +21,19 @@
|
|
|
21
21
|
import asyncio
|
|
22
22
|
import base64
|
|
23
23
|
import logging
|
|
24
|
+
import os
|
|
24
25
|
import re
|
|
25
26
|
import warnings
|
|
26
|
-
from
|
|
27
|
+
from contextlib import AsyncExitStack
|
|
28
|
+
from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
|
|
29
|
+
|
|
30
|
+
import httpx
|
|
27
31
|
|
|
28
32
|
from huggingface_hub import constants
|
|
29
|
-
from huggingface_hub.errors import InferenceTimeoutError
|
|
33
|
+
from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
|
|
30
34
|
from huggingface_hub.inference._common import (
|
|
31
35
|
TASKS_EXPECTING_IMAGES,
|
|
32
36
|
ContentT,
|
|
33
|
-
ModelStatus,
|
|
34
37
|
RequestParameters,
|
|
35
38
|
_async_stream_chat_completion_response,
|
|
36
39
|
_async_stream_text_generation_response,
|
|
@@ -41,7 +44,6 @@ from huggingface_hub.inference._common import (
|
|
|
41
44
|
_bytes_to_list,
|
|
42
45
|
_get_unsupported_text_generation_kwargs,
|
|
43
46
|
_import_numpy,
|
|
44
|
-
_open_as_binary,
|
|
45
47
|
_set_unsupported_text_generation_kwargs,
|
|
46
48
|
raise_text_generation_error,
|
|
47
49
|
)
|
|
@@ -88,16 +90,19 @@ from huggingface_hub.inference._generated.types import (
|
|
|
88
90
|
ZeroShotImageClassificationOutputElement,
|
|
89
91
|
)
|
|
90
92
|
from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
|
|
91
|
-
from huggingface_hub.utils import
|
|
93
|
+
from huggingface_hub.utils import (
|
|
94
|
+
build_hf_headers,
|
|
95
|
+
get_async_session,
|
|
96
|
+
hf_raise_for_status,
|
|
97
|
+
validate_hf_hub_args,
|
|
98
|
+
)
|
|
92
99
|
from huggingface_hub.utils._auth import get_token
|
|
93
|
-
from huggingface_hub.utils._deprecation import _deprecate_method
|
|
94
100
|
|
|
95
|
-
from .._common import _async_yield_from
|
|
101
|
+
from .._common import _async_yield_from
|
|
96
102
|
|
|
97
103
|
|
|
98
104
|
if TYPE_CHECKING:
|
|
99
105
|
import numpy as np
|
|
100
|
-
from aiohttp import ClientResponse, ClientSession
|
|
101
106
|
from PIL.Image import Image
|
|
102
107
|
|
|
103
108
|
logger = logging.getLogger(__name__)
|
|
@@ -130,18 +135,14 @@ class AsyncInferenceClient:
|
|
|
130
135
|
arguments are mutually exclusive and have the exact same behavior.
|
|
131
136
|
timeout (`float`, `optional`):
|
|
132
137
|
The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
|
|
133
|
-
headers (`
|
|
138
|
+
headers (`dict[str, str]`, `optional`):
|
|
134
139
|
Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
|
|
135
140
|
Values in this dictionary will override the default values.
|
|
136
141
|
bill_to (`str`, `optional`):
|
|
137
142
|
The billing account to use for the requests. By default the requests are billed on the user's account.
|
|
138
143
|
Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
|
|
139
|
-
cookies (`
|
|
144
|
+
cookies (`dict[str, str]`, `optional`):
|
|
140
145
|
Additional cookies to send to the server.
|
|
141
|
-
trust_env ('bool', 'optional'):
|
|
142
|
-
Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
|
|
143
|
-
proxies (`Any`, `optional`):
|
|
144
|
-
Proxies to use for the request.
|
|
145
146
|
base_url (`str`, `optional`):
|
|
146
147
|
Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
|
|
147
148
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
|
|
@@ -150,6 +151,7 @@ class AsyncInferenceClient:
|
|
|
150
151
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
|
|
151
152
|
"""
|
|
152
153
|
|
|
154
|
+
@validate_hf_hub_args
|
|
153
155
|
def __init__(
|
|
154
156
|
self,
|
|
155
157
|
model: Optional[str] = None,
|
|
@@ -157,10 +159,8 @@ class AsyncInferenceClient:
|
|
|
157
159
|
provider: Optional[PROVIDER_OR_POLICY_T] = None,
|
|
158
160
|
token: Optional[str] = None,
|
|
159
161
|
timeout: Optional[float] = None,
|
|
160
|
-
headers: Optional[
|
|
161
|
-
cookies: Optional[
|
|
162
|
-
trust_env: bool = False,
|
|
163
|
-
proxies: Optional[Any] = None,
|
|
162
|
+
headers: Optional[dict[str, str]] = None,
|
|
163
|
+
cookies: Optional[dict[str, str]] = None,
|
|
164
164
|
bill_to: Optional[str] = None,
|
|
165
165
|
# OpenAI compatibility
|
|
166
166
|
base_url: Optional[str] = None,
|
|
@@ -222,15 +222,36 @@ class AsyncInferenceClient:
|
|
|
222
222
|
|
|
223
223
|
self.cookies = cookies
|
|
224
224
|
self.timeout = timeout
|
|
225
|
-
self.trust_env = trust_env
|
|
226
|
-
self.proxies = proxies
|
|
227
225
|
|
|
228
|
-
|
|
229
|
-
self.
|
|
226
|
+
self.exit_stack = AsyncExitStack()
|
|
227
|
+
self._async_client: Optional[httpx.AsyncClient] = None
|
|
230
228
|
|
|
231
229
|
def __repr__(self):
|
|
232
230
|
return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
|
|
233
231
|
|
|
232
|
+
async def __aenter__(self):
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
236
|
+
await self.close()
|
|
237
|
+
|
|
238
|
+
async def close(self):
|
|
239
|
+
"""Close the client.
|
|
240
|
+
|
|
241
|
+
This method is automatically called when using the client as a context manager.
|
|
242
|
+
"""
|
|
243
|
+
await self.exit_stack.aclose()
|
|
244
|
+
|
|
245
|
+
async def _get_async_client(self):
|
|
246
|
+
"""Get a unique async client for this AsyncInferenceClient instance.
|
|
247
|
+
|
|
248
|
+
Returns the same client instance on subsequent calls, ensuring proper
|
|
249
|
+
connection reuse and resource management through the exit stack.
|
|
250
|
+
"""
|
|
251
|
+
if self._async_client is None:
|
|
252
|
+
self._async_client = await self.exit_stack.enter_async_context(get_async_session())
|
|
253
|
+
return self._async_client
|
|
254
|
+
|
|
234
255
|
@overload
|
|
235
256
|
async def _inner_post( # type: ignore[misc]
|
|
236
257
|
self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
|
|
@@ -239,83 +260,59 @@ class AsyncInferenceClient:
|
|
|
239
260
|
@overload
|
|
240
261
|
async def _inner_post( # type: ignore[misc]
|
|
241
262
|
self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
|
|
242
|
-
) -> AsyncIterable[
|
|
263
|
+
) -> AsyncIterable[str]: ...
|
|
243
264
|
|
|
244
265
|
@overload
|
|
245
266
|
async def _inner_post(
|
|
246
267
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
247
|
-
) -> Union[bytes, AsyncIterable[
|
|
268
|
+
) -> Union[bytes, AsyncIterable[str]]: ...
|
|
248
269
|
|
|
249
270
|
async def _inner_post(
|
|
250
271
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
251
|
-
) -> Union[bytes, AsyncIterable[
|
|
272
|
+
) -> Union[bytes, AsyncIterable[str]]:
|
|
252
273
|
"""Make a request to the inference server."""
|
|
253
274
|
|
|
254
|
-
aiohttp = _import_aiohttp()
|
|
255
|
-
|
|
256
275
|
# TODO: this should be handled in provider helpers directly
|
|
257
276
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
258
277
|
request_parameters.headers["Accept"] = "image/png"
|
|
259
278
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
279
|
+
try:
|
|
280
|
+
client = await self._get_async_client()
|
|
281
|
+
if stream:
|
|
282
|
+
response = await self.exit_stack.enter_async_context(
|
|
283
|
+
client.stream(
|
|
284
|
+
"POST",
|
|
285
|
+
request_parameters.url,
|
|
286
|
+
json=request_parameters.json,
|
|
287
|
+
data=request_parameters.data,
|
|
288
|
+
headers=request_parameters.headers,
|
|
289
|
+
cookies=self.cookies,
|
|
290
|
+
timeout=self.timeout,
|
|
291
|
+
)
|
|
268
292
|
)
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
raise
|
|
293
|
-
|
|
294
|
-
async def __aenter__(self):
|
|
295
|
-
return self
|
|
296
|
-
|
|
297
|
-
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
298
|
-
await self.close()
|
|
299
|
-
|
|
300
|
-
def __del__(self):
|
|
301
|
-
if len(self._sessions) > 0:
|
|
302
|
-
warnings.warn(
|
|
303
|
-
"Deleting 'AsyncInferenceClient' client but some sessions are still open. "
|
|
304
|
-
"This can happen if you've stopped streaming data from the server before the stream was complete. "
|
|
305
|
-
"To close the client properly, you must call `await client.close()` "
|
|
306
|
-
"or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
|
|
307
|
-
)
|
|
308
|
-
|
|
309
|
-
async def close(self):
|
|
310
|
-
"""Close all open sessions.
|
|
311
|
-
|
|
312
|
-
By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
|
|
313
|
-
are streaming data from the server and you stop before the stream is complete, you must call this method to
|
|
314
|
-
close the session properly.
|
|
315
|
-
|
|
316
|
-
Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
|
|
317
|
-
"""
|
|
318
|
-
await asyncio.gather(*[session.close() for session in self._sessions.keys()])
|
|
293
|
+
hf_raise_for_status(response)
|
|
294
|
+
return _async_yield_from(client, response)
|
|
295
|
+
else:
|
|
296
|
+
response = await client.post(
|
|
297
|
+
request_parameters.url,
|
|
298
|
+
json=request_parameters.json,
|
|
299
|
+
data=request_parameters.data,
|
|
300
|
+
headers=request_parameters.headers,
|
|
301
|
+
cookies=self.cookies,
|
|
302
|
+
timeout=self.timeout,
|
|
303
|
+
)
|
|
304
|
+
hf_raise_for_status(response)
|
|
305
|
+
return response.content
|
|
306
|
+
except asyncio.TimeoutError as error:
|
|
307
|
+
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
308
|
+
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
309
|
+
except HfHubHTTPError as error:
|
|
310
|
+
if error.response.status_code == 422 and request_parameters.task != "unknown":
|
|
311
|
+
msg = str(error.args[0])
|
|
312
|
+
if len(error.response.text) > 0:
|
|
313
|
+
msg += f"{os.linesep}{error.response.text}{os.linesep}"
|
|
314
|
+
error.args = (msg,) + error.args[1:]
|
|
315
|
+
raise
|
|
319
316
|
|
|
320
317
|
async def audio_classification(
|
|
321
318
|
self,
|
|
@@ -324,7 +321,7 @@ class AsyncInferenceClient:
|
|
|
324
321
|
model: Optional[str] = None,
|
|
325
322
|
top_k: Optional[int] = None,
|
|
326
323
|
function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
|
|
327
|
-
) ->
|
|
324
|
+
) -> list[AudioClassificationOutputElement]:
|
|
328
325
|
"""
|
|
329
326
|
Perform audio classification on the provided audio content.
|
|
330
327
|
|
|
@@ -342,12 +339,12 @@ class AsyncInferenceClient:
|
|
|
342
339
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
343
340
|
|
|
344
341
|
Returns:
|
|
345
|
-
`
|
|
342
|
+
`list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
346
343
|
|
|
347
344
|
Raises:
|
|
348
345
|
[`InferenceTimeoutError`]:
|
|
349
346
|
If the model is unavailable or the request times out.
|
|
350
|
-
`
|
|
347
|
+
[`HfHubHTTPError`]:
|
|
351
348
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
352
349
|
|
|
353
350
|
Example:
|
|
@@ -380,7 +377,7 @@ class AsyncInferenceClient:
|
|
|
380
377
|
audio: ContentT,
|
|
381
378
|
*,
|
|
382
379
|
model: Optional[str] = None,
|
|
383
|
-
) ->
|
|
380
|
+
) -> list[AudioToAudioOutputElement]:
|
|
384
381
|
"""
|
|
385
382
|
Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
|
|
386
383
|
|
|
@@ -394,12 +391,12 @@ class AsyncInferenceClient:
|
|
|
394
391
|
audio_to_audio will be used.
|
|
395
392
|
|
|
396
393
|
Returns:
|
|
397
|
-
`
|
|
394
|
+
`list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
|
|
398
395
|
|
|
399
396
|
Raises:
|
|
400
397
|
`InferenceTimeoutError`:
|
|
401
398
|
If the model is unavailable or the request times out.
|
|
402
|
-
`
|
|
399
|
+
[`HfHubHTTPError`]:
|
|
403
400
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
404
401
|
|
|
405
402
|
Example:
|
|
@@ -433,7 +430,7 @@ class AsyncInferenceClient:
|
|
|
433
430
|
audio: ContentT,
|
|
434
431
|
*,
|
|
435
432
|
model: Optional[str] = None,
|
|
436
|
-
extra_body: Optional[
|
|
433
|
+
extra_body: Optional[dict] = None,
|
|
437
434
|
) -> AutomaticSpeechRecognitionOutput:
|
|
438
435
|
"""
|
|
439
436
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -444,7 +441,7 @@ class AsyncInferenceClient:
|
|
|
444
441
|
model (`str`, *optional*):
|
|
445
442
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
446
443
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
447
|
-
extra_body (`
|
|
444
|
+
extra_body (`dict`, *optional*):
|
|
448
445
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
449
446
|
for supported parameters.
|
|
450
447
|
Returns:
|
|
@@ -453,7 +450,7 @@ class AsyncInferenceClient:
|
|
|
453
450
|
Raises:
|
|
454
451
|
[`InferenceTimeoutError`]:
|
|
455
452
|
If the model is unavailable or the request times out.
|
|
456
|
-
`
|
|
453
|
+
[`HfHubHTTPError`]:
|
|
457
454
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
458
455
|
|
|
459
456
|
Example:
|
|
@@ -480,105 +477,105 @@ class AsyncInferenceClient:
|
|
|
480
477
|
@overload
|
|
481
478
|
async def chat_completion( # type: ignore
|
|
482
479
|
self,
|
|
483
|
-
messages:
|
|
480
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
484
481
|
*,
|
|
485
482
|
model: Optional[str] = None,
|
|
486
483
|
stream: Literal[False] = False,
|
|
487
484
|
frequency_penalty: Optional[float] = None,
|
|
488
|
-
logit_bias: Optional[
|
|
485
|
+
logit_bias: Optional[list[float]] = None,
|
|
489
486
|
logprobs: Optional[bool] = None,
|
|
490
487
|
max_tokens: Optional[int] = None,
|
|
491
488
|
n: Optional[int] = None,
|
|
492
489
|
presence_penalty: Optional[float] = None,
|
|
493
490
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
494
491
|
seed: Optional[int] = None,
|
|
495
|
-
stop: Optional[
|
|
492
|
+
stop: Optional[list[str]] = None,
|
|
496
493
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
497
494
|
temperature: Optional[float] = None,
|
|
498
495
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
499
496
|
tool_prompt: Optional[str] = None,
|
|
500
|
-
tools: Optional[
|
|
497
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
501
498
|
top_logprobs: Optional[int] = None,
|
|
502
499
|
top_p: Optional[float] = None,
|
|
503
|
-
extra_body: Optional[
|
|
500
|
+
extra_body: Optional[dict] = None,
|
|
504
501
|
) -> ChatCompletionOutput: ...
|
|
505
502
|
|
|
506
503
|
@overload
|
|
507
504
|
async def chat_completion( # type: ignore
|
|
508
505
|
self,
|
|
509
|
-
messages:
|
|
506
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
510
507
|
*,
|
|
511
508
|
model: Optional[str] = None,
|
|
512
509
|
stream: Literal[True] = True,
|
|
513
510
|
frequency_penalty: Optional[float] = None,
|
|
514
|
-
logit_bias: Optional[
|
|
511
|
+
logit_bias: Optional[list[float]] = None,
|
|
515
512
|
logprobs: Optional[bool] = None,
|
|
516
513
|
max_tokens: Optional[int] = None,
|
|
517
514
|
n: Optional[int] = None,
|
|
518
515
|
presence_penalty: Optional[float] = None,
|
|
519
516
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
520
517
|
seed: Optional[int] = None,
|
|
521
|
-
stop: Optional[
|
|
518
|
+
stop: Optional[list[str]] = None,
|
|
522
519
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
523
520
|
temperature: Optional[float] = None,
|
|
524
521
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
525
522
|
tool_prompt: Optional[str] = None,
|
|
526
|
-
tools: Optional[
|
|
523
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
527
524
|
top_logprobs: Optional[int] = None,
|
|
528
525
|
top_p: Optional[float] = None,
|
|
529
|
-
extra_body: Optional[
|
|
526
|
+
extra_body: Optional[dict] = None,
|
|
530
527
|
) -> AsyncIterable[ChatCompletionStreamOutput]: ...
|
|
531
528
|
|
|
532
529
|
@overload
|
|
533
530
|
async def chat_completion(
|
|
534
531
|
self,
|
|
535
|
-
messages:
|
|
532
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
536
533
|
*,
|
|
537
534
|
model: Optional[str] = None,
|
|
538
535
|
stream: bool = False,
|
|
539
536
|
frequency_penalty: Optional[float] = None,
|
|
540
|
-
logit_bias: Optional[
|
|
537
|
+
logit_bias: Optional[list[float]] = None,
|
|
541
538
|
logprobs: Optional[bool] = None,
|
|
542
539
|
max_tokens: Optional[int] = None,
|
|
543
540
|
n: Optional[int] = None,
|
|
544
541
|
presence_penalty: Optional[float] = None,
|
|
545
542
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
546
543
|
seed: Optional[int] = None,
|
|
547
|
-
stop: Optional[
|
|
544
|
+
stop: Optional[list[str]] = None,
|
|
548
545
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
549
546
|
temperature: Optional[float] = None,
|
|
550
547
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
551
548
|
tool_prompt: Optional[str] = None,
|
|
552
|
-
tools: Optional[
|
|
549
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
553
550
|
top_logprobs: Optional[int] = None,
|
|
554
551
|
top_p: Optional[float] = None,
|
|
555
|
-
extra_body: Optional[
|
|
552
|
+
extra_body: Optional[dict] = None,
|
|
556
553
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
|
|
557
554
|
|
|
558
555
|
async def chat_completion(
|
|
559
556
|
self,
|
|
560
|
-
messages:
|
|
557
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
561
558
|
*,
|
|
562
559
|
model: Optional[str] = None,
|
|
563
560
|
stream: bool = False,
|
|
564
561
|
# Parameters from ChatCompletionInput (handled manually)
|
|
565
562
|
frequency_penalty: Optional[float] = None,
|
|
566
|
-
logit_bias: Optional[
|
|
563
|
+
logit_bias: Optional[list[float]] = None,
|
|
567
564
|
logprobs: Optional[bool] = None,
|
|
568
565
|
max_tokens: Optional[int] = None,
|
|
569
566
|
n: Optional[int] = None,
|
|
570
567
|
presence_penalty: Optional[float] = None,
|
|
571
568
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
572
569
|
seed: Optional[int] = None,
|
|
573
|
-
stop: Optional[
|
|
570
|
+
stop: Optional[list[str]] = None,
|
|
574
571
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
575
572
|
temperature: Optional[float] = None,
|
|
576
573
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
577
574
|
tool_prompt: Optional[str] = None,
|
|
578
|
-
tools: Optional[
|
|
575
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
579
576
|
top_logprobs: Optional[int] = None,
|
|
580
577
|
top_p: Optional[float] = None,
|
|
581
|
-
extra_body: Optional[
|
|
578
|
+
extra_body: Optional[dict] = None,
|
|
582
579
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
|
|
583
580
|
"""
|
|
584
581
|
A method for completing conversations using a specified language model.
|
|
@@ -608,7 +605,7 @@ class AsyncInferenceClient:
|
|
|
608
605
|
frequency_penalty (`float`, *optional*):
|
|
609
606
|
Penalizes new tokens based on their existing frequency
|
|
610
607
|
in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
|
|
611
|
-
logit_bias (`
|
|
608
|
+
logit_bias (`list[float]`, *optional*):
|
|
612
609
|
Adjusts the likelihood of specific tokens appearing in the generated output.
|
|
613
610
|
logprobs (`bool`, *optional*):
|
|
614
611
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
@@ -624,7 +621,7 @@ class AsyncInferenceClient:
|
|
|
624
621
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
625
622
|
seed (Optional[`int`], *optional*):
|
|
626
623
|
Seed for reproducible control flow. Defaults to None.
|
|
627
|
-
stop (`
|
|
624
|
+
stop (`list[str]`, *optional*):
|
|
628
625
|
Up to four strings which trigger the end of the response.
|
|
629
626
|
Defaults to None.
|
|
630
627
|
stream (`bool`, *optional*):
|
|
@@ -648,7 +645,7 @@ class AsyncInferenceClient:
|
|
|
648
645
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
649
646
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
650
647
|
provide a list of functions the model may generate JSON inputs for.
|
|
651
|
-
extra_body (`
|
|
648
|
+
extra_body (`dict`, *optional*):
|
|
652
649
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
653
650
|
for supported parameters.
|
|
654
651
|
Returns:
|
|
@@ -660,7 +657,7 @@ class AsyncInferenceClient:
|
|
|
660
657
|
Raises:
|
|
661
658
|
[`InferenceTimeoutError`]:
|
|
662
659
|
If the model is unavailable or the request times out.
|
|
663
|
-
`
|
|
660
|
+
[`HfHubHTTPError`]:
|
|
664
661
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
665
662
|
|
|
666
663
|
Example:
|
|
@@ -980,8 +977,8 @@ class AsyncInferenceClient:
|
|
|
980
977
|
max_question_len: Optional[int] = None,
|
|
981
978
|
max_seq_len: Optional[int] = None,
|
|
982
979
|
top_k: Optional[int] = None,
|
|
983
|
-
word_boxes: Optional[
|
|
984
|
-
) ->
|
|
980
|
+
word_boxes: Optional[list[Union[list[float], str]]] = None,
|
|
981
|
+
) -> list[DocumentQuestionAnsweringOutputElement]:
|
|
985
982
|
"""
|
|
986
983
|
Answer questions on document images.
|
|
987
984
|
|
|
@@ -1011,16 +1008,16 @@ class AsyncInferenceClient:
|
|
|
1011
1008
|
top_k (`int`, *optional*):
|
|
1012
1009
|
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
1013
1010
|
answers if there are not enough options available within the context.
|
|
1014
|
-
word_boxes (`
|
|
1011
|
+
word_boxes (`list[Union[list[float], str`, *optional*):
|
|
1015
1012
|
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
1016
1013
|
step and use the provided bounding boxes instead.
|
|
1017
1014
|
Returns:
|
|
1018
|
-
`
|
|
1015
|
+
`list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
1019
1016
|
|
|
1020
1017
|
Raises:
|
|
1021
1018
|
[`InferenceTimeoutError`]:
|
|
1022
1019
|
If the model is unavailable or the request times out.
|
|
1023
|
-
`
|
|
1020
|
+
[`HfHubHTTPError`]:
|
|
1024
1021
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1025
1022
|
|
|
1026
1023
|
|
|
@@ -1035,7 +1032,7 @@ class AsyncInferenceClient:
|
|
|
1035
1032
|
"""
|
|
1036
1033
|
model_id = model or self.model
|
|
1037
1034
|
provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
|
|
1038
|
-
inputs:
|
|
1035
|
+
inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
1039
1036
|
request_parameters = provider_helper.prepare_request(
|
|
1040
1037
|
inputs=inputs,
|
|
1041
1038
|
parameters={
|
|
@@ -1096,7 +1093,7 @@ class AsyncInferenceClient:
|
|
|
1096
1093
|
Raises:
|
|
1097
1094
|
[`InferenceTimeoutError`]:
|
|
1098
1095
|
If the model is unavailable or the request times out.
|
|
1099
|
-
`
|
|
1096
|
+
[`HfHubHTTPError`]:
|
|
1100
1097
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1101
1098
|
|
|
1102
1099
|
Example:
|
|
@@ -1134,9 +1131,9 @@ class AsyncInferenceClient:
|
|
|
1134
1131
|
text: str,
|
|
1135
1132
|
*,
|
|
1136
1133
|
model: Optional[str] = None,
|
|
1137
|
-
targets: Optional[
|
|
1134
|
+
targets: Optional[list[str]] = None,
|
|
1138
1135
|
top_k: Optional[int] = None,
|
|
1139
|
-
) ->
|
|
1136
|
+
) -> list[FillMaskOutputElement]:
|
|
1140
1137
|
"""
|
|
1141
1138
|
Fill in a hole with a missing word (token to be precise).
|
|
1142
1139
|
|
|
@@ -1146,20 +1143,20 @@ class AsyncInferenceClient:
|
|
|
1146
1143
|
model (`str`, *optional*):
|
|
1147
1144
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1148
1145
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1149
|
-
targets (`
|
|
1146
|
+
targets (`list[str`, *optional*):
|
|
1150
1147
|
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1151
1148
|
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1152
1149
|
resulting token will be used (with a warning, and that might be slower).
|
|
1153
1150
|
top_k (`int`, *optional*):
|
|
1154
1151
|
When passed, overrides the number of predictions to return.
|
|
1155
1152
|
Returns:
|
|
1156
|
-
`
|
|
1153
|
+
`list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
|
|
1157
1154
|
probability, token reference, and completed text.
|
|
1158
1155
|
|
|
1159
1156
|
Raises:
|
|
1160
1157
|
[`InferenceTimeoutError`]:
|
|
1161
1158
|
If the model is unavailable or the request times out.
|
|
1162
|
-
`
|
|
1159
|
+
[`HfHubHTTPError`]:
|
|
1163
1160
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1164
1161
|
|
|
1165
1162
|
Example:
|
|
@@ -1193,7 +1190,7 @@ class AsyncInferenceClient:
|
|
|
1193
1190
|
model: Optional[str] = None,
|
|
1194
1191
|
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1195
1192
|
top_k: Optional[int] = None,
|
|
1196
|
-
) ->
|
|
1193
|
+
) -> list[ImageClassificationOutputElement]:
|
|
1197
1194
|
"""
|
|
1198
1195
|
Perform image classification on the given image using the specified model.
|
|
1199
1196
|
|
|
@@ -1208,12 +1205,12 @@ class AsyncInferenceClient:
|
|
|
1208
1205
|
top_k (`int`, *optional*):
|
|
1209
1206
|
When specified, limits the output to the top K most probable classes.
|
|
1210
1207
|
Returns:
|
|
1211
|
-
`
|
|
1208
|
+
`list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1212
1209
|
|
|
1213
1210
|
Raises:
|
|
1214
1211
|
[`InferenceTimeoutError`]:
|
|
1215
1212
|
If the model is unavailable or the request times out.
|
|
1216
|
-
`
|
|
1213
|
+
[`HfHubHTTPError`]:
|
|
1217
1214
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1218
1215
|
|
|
1219
1216
|
Example:
|
|
@@ -1246,7 +1243,7 @@ class AsyncInferenceClient:
|
|
|
1246
1243
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1247
1244
|
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1248
1245
|
threshold: Optional[float] = None,
|
|
1249
|
-
) ->
|
|
1246
|
+
) -> list[ImageSegmentationOutputElement]:
|
|
1250
1247
|
"""
|
|
1251
1248
|
Perform image segmentation on the given image using the specified model.
|
|
1252
1249
|
|
|
@@ -1271,12 +1268,12 @@ class AsyncInferenceClient:
|
|
|
1271
1268
|
threshold (`float`, *optional*):
|
|
1272
1269
|
Probability threshold to filter out predicted masks.
|
|
1273
1270
|
Returns:
|
|
1274
|
-
`
|
|
1271
|
+
`list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
|
|
1275
1272
|
|
|
1276
1273
|
Raises:
|
|
1277
1274
|
[`InferenceTimeoutError`]:
|
|
1278
1275
|
If the model is unavailable or the request times out.
|
|
1279
|
-
`
|
|
1276
|
+
[`HfHubHTTPError`]:
|
|
1280
1277
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1281
1278
|
|
|
1282
1279
|
Example:
|
|
@@ -1354,7 +1351,7 @@ class AsyncInferenceClient:
|
|
|
1354
1351
|
Raises:
|
|
1355
1352
|
[`InferenceTimeoutError`]:
|
|
1356
1353
|
If the model is unavailable or the request times out.
|
|
1357
|
-
`
|
|
1354
|
+
[`HfHubHTTPError`]:
|
|
1358
1355
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1359
1356
|
|
|
1360
1357
|
Example:
|
|
@@ -1486,7 +1483,7 @@ class AsyncInferenceClient:
|
|
|
1486
1483
|
Raises:
|
|
1487
1484
|
[`InferenceTimeoutError`]:
|
|
1488
1485
|
If the model is unavailable or the request times out.
|
|
1489
|
-
`
|
|
1486
|
+
[`HfHubHTTPError`]:
|
|
1490
1487
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1491
1488
|
|
|
1492
1489
|
Example:
|
|
@@ -1510,12 +1507,12 @@ class AsyncInferenceClient:
|
|
|
1510
1507
|
api_key=self.token,
|
|
1511
1508
|
)
|
|
1512
1509
|
response = await self._inner_post(request_parameters)
|
|
1513
|
-
|
|
1514
|
-
return
|
|
1510
|
+
output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
|
|
1511
|
+
return output_list[0]
|
|
1515
1512
|
|
|
1516
1513
|
async def object_detection(
|
|
1517
1514
|
self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
|
|
1518
|
-
) ->
|
|
1515
|
+
) -> list[ObjectDetectionOutputElement]:
|
|
1519
1516
|
"""
|
|
1520
1517
|
Perform object detection on the given image using the specified model.
|
|
1521
1518
|
|
|
@@ -1534,12 +1531,12 @@ class AsyncInferenceClient:
|
|
|
1534
1531
|
threshold (`float`, *optional*):
|
|
1535
1532
|
The probability necessary to make a prediction.
|
|
1536
1533
|
Returns:
|
|
1537
|
-
`
|
|
1534
|
+
`list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
|
|
1538
1535
|
|
|
1539
1536
|
Raises:
|
|
1540
1537
|
[`InferenceTimeoutError`]:
|
|
1541
1538
|
If the model is unavailable or the request times out.
|
|
1542
|
-
`
|
|
1539
|
+
[`HfHubHTTPError`]:
|
|
1543
1540
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1544
1541
|
`ValueError`:
|
|
1545
1542
|
If the request output is not a List.
|
|
@@ -1578,7 +1575,7 @@ class AsyncInferenceClient:
|
|
|
1578
1575
|
max_question_len: Optional[int] = None,
|
|
1579
1576
|
max_seq_len: Optional[int] = None,
|
|
1580
1577
|
top_k: Optional[int] = None,
|
|
1581
|
-
) -> Union[QuestionAnsweringOutputElement,
|
|
1578
|
+
) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
|
|
1582
1579
|
"""
|
|
1583
1580
|
Retrieve the answer to a question from a given text.
|
|
1584
1581
|
|
|
@@ -1610,13 +1607,13 @@ class AsyncInferenceClient:
|
|
|
1610
1607
|
topk answers if there are not enough options available within the context.
|
|
1611
1608
|
|
|
1612
1609
|
Returns:
|
|
1613
|
-
Union[`QuestionAnsweringOutputElement`,
|
|
1610
|
+
Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
|
|
1614
1611
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
1615
1612
|
When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
|
|
1616
1613
|
Raises:
|
|
1617
1614
|
[`InferenceTimeoutError`]:
|
|
1618
1615
|
If the model is unavailable or the request times out.
|
|
1619
|
-
`
|
|
1616
|
+
[`HfHubHTTPError`]:
|
|
1620
1617
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1621
1618
|
|
|
1622
1619
|
Example:
|
|
@@ -1651,15 +1648,15 @@ class AsyncInferenceClient:
|
|
|
1651
1648
|
return output
|
|
1652
1649
|
|
|
1653
1650
|
async def sentence_similarity(
|
|
1654
|
-
self, sentence: str, other_sentences:
|
|
1655
|
-
) ->
|
|
1651
|
+
self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
|
|
1652
|
+
) -> list[float]:
|
|
1656
1653
|
"""
|
|
1657
1654
|
Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
|
|
1658
1655
|
|
|
1659
1656
|
Args:
|
|
1660
1657
|
sentence (`str`):
|
|
1661
1658
|
The main sentence to compare to others.
|
|
1662
|
-
other_sentences (`
|
|
1659
|
+
other_sentences (`list[str]`):
|
|
1663
1660
|
The list of sentences to compare to.
|
|
1664
1661
|
model (`str`, *optional*):
|
|
1665
1662
|
The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1667,12 +1664,12 @@ class AsyncInferenceClient:
|
|
|
1667
1664
|
Defaults to None.
|
|
1668
1665
|
|
|
1669
1666
|
Returns:
|
|
1670
|
-
`
|
|
1667
|
+
`list[float]`: The embedding representing the input text.
|
|
1671
1668
|
|
|
1672
1669
|
Raises:
|
|
1673
1670
|
[`InferenceTimeoutError`]:
|
|
1674
1671
|
If the model is unavailable or the request times out.
|
|
1675
|
-
`
|
|
1672
|
+
[`HfHubHTTPError`]:
|
|
1676
1673
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1677
1674
|
|
|
1678
1675
|
Example:
|
|
@@ -1710,7 +1707,7 @@ class AsyncInferenceClient:
|
|
|
1710
1707
|
*,
|
|
1711
1708
|
model: Optional[str] = None,
|
|
1712
1709
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1713
|
-
generate_parameters: Optional[
|
|
1710
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
1714
1711
|
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1715
1712
|
) -> SummarizationOutput:
|
|
1716
1713
|
"""
|
|
@@ -1724,7 +1721,7 @@ class AsyncInferenceClient:
|
|
|
1724
1721
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
1725
1722
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
1726
1723
|
Whether to clean up the potential extra spaces in the text output.
|
|
1727
|
-
generate_parameters (`
|
|
1724
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
1728
1725
|
Additional parametrization of the text generation algorithm.
|
|
1729
1726
|
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1730
1727
|
The truncation strategy to use.
|
|
@@ -1734,7 +1731,7 @@ class AsyncInferenceClient:
|
|
|
1734
1731
|
Raises:
|
|
1735
1732
|
[`InferenceTimeoutError`]:
|
|
1736
1733
|
If the model is unavailable or the request times out.
|
|
1737
|
-
`
|
|
1734
|
+
[`HfHubHTTPError`]:
|
|
1738
1735
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1739
1736
|
|
|
1740
1737
|
Example:
|
|
@@ -1765,7 +1762,7 @@ class AsyncInferenceClient:
|
|
|
1765
1762
|
|
|
1766
1763
|
async def table_question_answering(
|
|
1767
1764
|
self,
|
|
1768
|
-
table:
|
|
1765
|
+
table: dict[str, Any],
|
|
1769
1766
|
query: str,
|
|
1770
1767
|
*,
|
|
1771
1768
|
model: Optional[str] = None,
|
|
@@ -1800,7 +1797,7 @@ class AsyncInferenceClient:
|
|
|
1800
1797
|
Raises:
|
|
1801
1798
|
[`InferenceTimeoutError`]:
|
|
1802
1799
|
If the model is unavailable or the request times out.
|
|
1803
|
-
`
|
|
1800
|
+
[`HfHubHTTPError`]:
|
|
1804
1801
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1805
1802
|
|
|
1806
1803
|
Example:
|
|
@@ -1826,12 +1823,12 @@ class AsyncInferenceClient:
|
|
|
1826
1823
|
response = await self._inner_post(request_parameters)
|
|
1827
1824
|
return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
|
|
1828
1825
|
|
|
1829
|
-
async def tabular_classification(self, table:
|
|
1826
|
+
async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
|
|
1830
1827
|
"""
|
|
1831
1828
|
Classifying a target category (a group) based on a set of attributes.
|
|
1832
1829
|
|
|
1833
1830
|
Args:
|
|
1834
|
-
table (`
|
|
1831
|
+
table (`dict[str, Any]`):
|
|
1835
1832
|
Set of attributes to classify.
|
|
1836
1833
|
model (`str`, *optional*):
|
|
1837
1834
|
The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1844,7 +1841,7 @@ class AsyncInferenceClient:
|
|
|
1844
1841
|
Raises:
|
|
1845
1842
|
[`InferenceTimeoutError`]:
|
|
1846
1843
|
If the model is unavailable or the request times out.
|
|
1847
|
-
`
|
|
1844
|
+
[`HfHubHTTPError`]:
|
|
1848
1845
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1849
1846
|
|
|
1850
1847
|
Example:
|
|
@@ -1882,12 +1879,12 @@ class AsyncInferenceClient:
|
|
|
1882
1879
|
response = await self._inner_post(request_parameters)
|
|
1883
1880
|
return _bytes_to_list(response)
|
|
1884
1881
|
|
|
1885
|
-
async def tabular_regression(self, table:
|
|
1882
|
+
async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
|
|
1886
1883
|
"""
|
|
1887
1884
|
Predicting a numerical target value given a set of attributes/features in a table.
|
|
1888
1885
|
|
|
1889
1886
|
Args:
|
|
1890
|
-
table (`
|
|
1887
|
+
table (`dict[str, Any]`):
|
|
1891
1888
|
Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
|
|
1892
1889
|
model (`str`, *optional*):
|
|
1893
1890
|
The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1900,7 +1897,7 @@ class AsyncInferenceClient:
|
|
|
1900
1897
|
Raises:
|
|
1901
1898
|
[`InferenceTimeoutError`]:
|
|
1902
1899
|
If the model is unavailable or the request times out.
|
|
1903
|
-
`
|
|
1900
|
+
[`HfHubHTTPError`]:
|
|
1904
1901
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1905
1902
|
|
|
1906
1903
|
Example:
|
|
@@ -1940,7 +1937,7 @@ class AsyncInferenceClient:
|
|
|
1940
1937
|
model: Optional[str] = None,
|
|
1941
1938
|
top_k: Optional[int] = None,
|
|
1942
1939
|
function_to_apply: Optional["TextClassificationOutputTransform"] = None,
|
|
1943
|
-
) ->
|
|
1940
|
+
) -> list[TextClassificationOutputElement]:
|
|
1944
1941
|
"""
|
|
1945
1942
|
Perform text classification (e.g. sentiment-analysis) on the given text.
|
|
1946
1943
|
|
|
@@ -1957,12 +1954,12 @@ class AsyncInferenceClient:
|
|
|
1957
1954
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
1958
1955
|
|
|
1959
1956
|
Returns:
|
|
1960
|
-
`
|
|
1957
|
+
`list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1961
1958
|
|
|
1962
1959
|
Raises:
|
|
1963
1960
|
[`InferenceTimeoutError`]:
|
|
1964
1961
|
If the model is unavailable or the request times out.
|
|
1965
|
-
`
|
|
1962
|
+
[`HfHubHTTPError`]:
|
|
1966
1963
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1967
1964
|
|
|
1968
1965
|
Example:
|
|
@@ -2011,8 +2008,8 @@ class AsyncInferenceClient:
|
|
|
2011
2008
|
repetition_penalty: Optional[float] = None,
|
|
2012
2009
|
return_full_text: Optional[bool] = None,
|
|
2013
2010
|
seed: Optional[int] = None,
|
|
2014
|
-
stop: Optional[
|
|
2015
|
-
stop_sequences: Optional[
|
|
2011
|
+
stop: Optional[list[str]] = None,
|
|
2012
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2016
2013
|
temperature: Optional[float] = None,
|
|
2017
2014
|
top_k: Optional[int] = None,
|
|
2018
2015
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2041,8 +2038,8 @@ class AsyncInferenceClient:
|
|
|
2041
2038
|
repetition_penalty: Optional[float] = None,
|
|
2042
2039
|
return_full_text: Optional[bool] = None,
|
|
2043
2040
|
seed: Optional[int] = None,
|
|
2044
|
-
stop: Optional[
|
|
2045
|
-
stop_sequences: Optional[
|
|
2041
|
+
stop: Optional[list[str]] = None,
|
|
2042
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2046
2043
|
temperature: Optional[float] = None,
|
|
2047
2044
|
top_k: Optional[int] = None,
|
|
2048
2045
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2071,8 +2068,8 @@ class AsyncInferenceClient:
|
|
|
2071
2068
|
repetition_penalty: Optional[float] = None,
|
|
2072
2069
|
return_full_text: Optional[bool] = None, # Manual default value
|
|
2073
2070
|
seed: Optional[int] = None,
|
|
2074
|
-
stop: Optional[
|
|
2075
|
-
stop_sequences: Optional[
|
|
2071
|
+
stop: Optional[list[str]] = None,
|
|
2072
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2076
2073
|
temperature: Optional[float] = None,
|
|
2077
2074
|
top_k: Optional[int] = None,
|
|
2078
2075
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2101,8 +2098,8 @@ class AsyncInferenceClient:
|
|
|
2101
2098
|
repetition_penalty: Optional[float] = None,
|
|
2102
2099
|
return_full_text: Optional[bool] = None,
|
|
2103
2100
|
seed: Optional[int] = None,
|
|
2104
|
-
stop: Optional[
|
|
2105
|
-
stop_sequences: Optional[
|
|
2101
|
+
stop: Optional[list[str]] = None,
|
|
2102
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2106
2103
|
temperature: Optional[float] = None,
|
|
2107
2104
|
top_k: Optional[int] = None,
|
|
2108
2105
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2131,8 +2128,8 @@ class AsyncInferenceClient:
|
|
|
2131
2128
|
repetition_penalty: Optional[float] = None,
|
|
2132
2129
|
return_full_text: Optional[bool] = None,
|
|
2133
2130
|
seed: Optional[int] = None,
|
|
2134
|
-
stop: Optional[
|
|
2135
|
-
stop_sequences: Optional[
|
|
2131
|
+
stop: Optional[list[str]] = None,
|
|
2132
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2136
2133
|
temperature: Optional[float] = None,
|
|
2137
2134
|
top_k: Optional[int] = None,
|
|
2138
2135
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2160,8 +2157,8 @@ class AsyncInferenceClient:
|
|
|
2160
2157
|
repetition_penalty: Optional[float] = None,
|
|
2161
2158
|
return_full_text: Optional[bool] = None,
|
|
2162
2159
|
seed: Optional[int] = None,
|
|
2163
|
-
stop: Optional[
|
|
2164
|
-
stop_sequences: Optional[
|
|
2160
|
+
stop: Optional[list[str]] = None,
|
|
2161
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2165
2162
|
temperature: Optional[float] = None,
|
|
2166
2163
|
top_k: Optional[int] = None,
|
|
2167
2164
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2217,9 +2214,9 @@ class AsyncInferenceClient:
|
|
|
2217
2214
|
Whether to prepend the prompt to the generated text
|
|
2218
2215
|
seed (`int`, *optional*):
|
|
2219
2216
|
Random sampling seed
|
|
2220
|
-
stop (`
|
|
2217
|
+
stop (`list[str]`, *optional*):
|
|
2221
2218
|
Stop generating tokens if a member of `stop` is generated.
|
|
2222
|
-
stop_sequences (`
|
|
2219
|
+
stop_sequences (`list[str]`, *optional*):
|
|
2223
2220
|
Deprecated argument. Use `stop` instead.
|
|
2224
2221
|
temperature (`float`, *optional*):
|
|
2225
2222
|
The value used to module the logits distribution.
|
|
@@ -2240,10 +2237,10 @@ class AsyncInferenceClient:
|
|
|
2240
2237
|
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
|
|
2241
2238
|
|
|
2242
2239
|
Returns:
|
|
2243
|
-
`Union[str, TextGenerationOutput,
|
|
2240
|
+
`Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
|
|
2244
2241
|
Generated text returned from the server:
|
|
2245
2242
|
- if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
|
|
2246
|
-
- if `stream=True` and `details=False`, the generated text is returned token by token as a `
|
|
2243
|
+
- if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
|
|
2247
2244
|
- if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
|
|
2248
2245
|
- if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
|
|
2249
2246
|
|
|
@@ -2252,7 +2249,7 @@ class AsyncInferenceClient:
|
|
|
2252
2249
|
If input values are not valid. No HTTP call is made to the server.
|
|
2253
2250
|
[`InferenceTimeoutError`]:
|
|
2254
2251
|
If the model is unavailable or the request times out.
|
|
2255
|
-
`
|
|
2252
|
+
[`HfHubHTTPError`]:
|
|
2256
2253
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2257
2254
|
|
|
2258
2255
|
Example:
|
|
@@ -2442,9 +2439,9 @@ class AsyncInferenceClient:
|
|
|
2442
2439
|
# Handle errors separately for more precise error messages
|
|
2443
2440
|
try:
|
|
2444
2441
|
bytes_output = await self._inner_post(request_parameters, stream=stream or False)
|
|
2445
|
-
except
|
|
2446
|
-
match = MODEL_KWARGS_NOT_USED_REGEX.search(e
|
|
2447
|
-
if e
|
|
2442
|
+
except HfHubHTTPError as e:
|
|
2443
|
+
match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
|
|
2444
|
+
if isinstance(e, BadRequestError) and match:
|
|
2448
2445
|
unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
|
|
2449
2446
|
_set_unsupported_text_generation_kwargs(model, unused_params)
|
|
2450
2447
|
return await self.text_generation( # type: ignore
|
|
@@ -2497,7 +2494,7 @@ class AsyncInferenceClient:
|
|
|
2497
2494
|
model: Optional[str] = None,
|
|
2498
2495
|
scheduler: Optional[str] = None,
|
|
2499
2496
|
seed: Optional[int] = None,
|
|
2500
|
-
extra_body: Optional[
|
|
2497
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2501
2498
|
) -> "Image":
|
|
2502
2499
|
"""
|
|
2503
2500
|
Generate an image based on a given text using a specified model.
|
|
@@ -2535,7 +2532,7 @@ class AsyncInferenceClient:
|
|
|
2535
2532
|
Override the scheduler with a compatible one.
|
|
2536
2533
|
seed (`int`, *optional*):
|
|
2537
2534
|
Seed for the random number generator.
|
|
2538
|
-
extra_body (`
|
|
2535
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2539
2536
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2540
2537
|
for supported parameters.
|
|
2541
2538
|
|
|
@@ -2545,7 +2542,7 @@ class AsyncInferenceClient:
|
|
|
2545
2542
|
Raises:
|
|
2546
2543
|
[`InferenceTimeoutError`]:
|
|
2547
2544
|
If the model is unavailable or the request times out.
|
|
2548
|
-
`
|
|
2545
|
+
[`HfHubHTTPError`]:
|
|
2549
2546
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2550
2547
|
|
|
2551
2548
|
Example:
|
|
@@ -2635,11 +2632,11 @@ class AsyncInferenceClient:
|
|
|
2635
2632
|
*,
|
|
2636
2633
|
model: Optional[str] = None,
|
|
2637
2634
|
guidance_scale: Optional[float] = None,
|
|
2638
|
-
negative_prompt: Optional[
|
|
2635
|
+
negative_prompt: Optional[list[str]] = None,
|
|
2639
2636
|
num_frames: Optional[float] = None,
|
|
2640
2637
|
num_inference_steps: Optional[int] = None,
|
|
2641
2638
|
seed: Optional[int] = None,
|
|
2642
|
-
extra_body: Optional[
|
|
2639
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2643
2640
|
) -> bytes:
|
|
2644
2641
|
"""
|
|
2645
2642
|
Generate a video based on a given text.
|
|
@@ -2658,7 +2655,7 @@ class AsyncInferenceClient:
|
|
|
2658
2655
|
guidance_scale (`float`, *optional*):
|
|
2659
2656
|
A higher guidance scale value encourages the model to generate videos closely linked to the text
|
|
2660
2657
|
prompt, but values too high may cause saturation and other artifacts.
|
|
2661
|
-
negative_prompt (`
|
|
2658
|
+
negative_prompt (`list[str]`, *optional*):
|
|
2662
2659
|
One or several prompt to guide what NOT to include in video generation.
|
|
2663
2660
|
num_frames (`float`, *optional*):
|
|
2664
2661
|
The num_frames parameter determines how many video frames are generated.
|
|
@@ -2667,7 +2664,7 @@ class AsyncInferenceClient:
|
|
|
2667
2664
|
expense of slower inference.
|
|
2668
2665
|
seed (`int`, *optional*):
|
|
2669
2666
|
Seed for the random number generator.
|
|
2670
|
-
extra_body (`
|
|
2667
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2671
2668
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2672
2669
|
for supported parameters.
|
|
2673
2670
|
|
|
@@ -2747,7 +2744,7 @@ class AsyncInferenceClient:
|
|
|
2747
2744
|
top_p: Optional[float] = None,
|
|
2748
2745
|
typical_p: Optional[float] = None,
|
|
2749
2746
|
use_cache: Optional[bool] = None,
|
|
2750
|
-
extra_body: Optional[
|
|
2747
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2751
2748
|
) -> bytes:
|
|
2752
2749
|
"""
|
|
2753
2750
|
Synthesize an audio of a voice pronouncing a given text.
|
|
@@ -2809,7 +2806,7 @@ class AsyncInferenceClient:
|
|
|
2809
2806
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2810
2807
|
use_cache (`bool`, *optional*):
|
|
2811
2808
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2812
|
-
extra_body (`
|
|
2809
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2813
2810
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2814
2811
|
for supported parameters.
|
|
2815
2812
|
Returns:
|
|
@@ -2818,7 +2815,7 @@ class AsyncInferenceClient:
|
|
|
2818
2815
|
Raises:
|
|
2819
2816
|
[`InferenceTimeoutError`]:
|
|
2820
2817
|
If the model is unavailable or the request times out.
|
|
2821
|
-
`
|
|
2818
|
+
[`HfHubHTTPError`]:
|
|
2822
2819
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2823
2820
|
|
|
2824
2821
|
Example:
|
|
@@ -2942,9 +2939,9 @@ class AsyncInferenceClient:
|
|
|
2942
2939
|
*,
|
|
2943
2940
|
model: Optional[str] = None,
|
|
2944
2941
|
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2945
|
-
ignore_labels: Optional[
|
|
2942
|
+
ignore_labels: Optional[list[str]] = None,
|
|
2946
2943
|
stride: Optional[int] = None,
|
|
2947
|
-
) ->
|
|
2944
|
+
) -> list[TokenClassificationOutputElement]:
|
|
2948
2945
|
"""
|
|
2949
2946
|
Perform token classification on the given text.
|
|
2950
2947
|
Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
|
|
@@ -2958,18 +2955,18 @@ class AsyncInferenceClient:
|
|
|
2958
2955
|
Defaults to None.
|
|
2959
2956
|
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2960
2957
|
The strategy used to fuse tokens based on model predictions
|
|
2961
|
-
ignore_labels (`
|
|
2958
|
+
ignore_labels (`list[str`, *optional*):
|
|
2962
2959
|
A list of labels to ignore
|
|
2963
2960
|
stride (`int`, *optional*):
|
|
2964
2961
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2965
2962
|
|
|
2966
2963
|
Returns:
|
|
2967
|
-
`
|
|
2964
|
+
`list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
|
|
2968
2965
|
|
|
2969
2966
|
Raises:
|
|
2970
2967
|
[`InferenceTimeoutError`]:
|
|
2971
2968
|
If the model is unavailable or the request times out.
|
|
2972
|
-
`
|
|
2969
|
+
[`HfHubHTTPError`]:
|
|
2973
2970
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2974
2971
|
|
|
2975
2972
|
Example:
|
|
@@ -3021,7 +3018,7 @@ class AsyncInferenceClient:
|
|
|
3021
3018
|
tgt_lang: Optional[str] = None,
|
|
3022
3019
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
3023
3020
|
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
3024
|
-
generate_parameters: Optional[
|
|
3021
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
3025
3022
|
) -> TranslationOutput:
|
|
3026
3023
|
"""
|
|
3027
3024
|
Convert text from one language to another.
|
|
@@ -3046,7 +3043,7 @@ class AsyncInferenceClient:
|
|
|
3046
3043
|
Whether to clean up the potential extra spaces in the text output.
|
|
3047
3044
|
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
3048
3045
|
The truncation strategy to use.
|
|
3049
|
-
generate_parameters (`
|
|
3046
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
3050
3047
|
Additional parametrization of the text generation algorithm.
|
|
3051
3048
|
|
|
3052
3049
|
Returns:
|
|
@@ -3055,7 +3052,7 @@ class AsyncInferenceClient:
|
|
|
3055
3052
|
Raises:
|
|
3056
3053
|
[`InferenceTimeoutError`]:
|
|
3057
3054
|
If the model is unavailable or the request times out.
|
|
3058
|
-
`
|
|
3055
|
+
[`HfHubHTTPError`]:
|
|
3059
3056
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3060
3057
|
`ValueError`:
|
|
3061
3058
|
If only one of the `src_lang` and `tgt_lang` arguments are provided.
|
|
@@ -3109,7 +3106,7 @@ class AsyncInferenceClient:
|
|
|
3109
3106
|
*,
|
|
3110
3107
|
model: Optional[str] = None,
|
|
3111
3108
|
top_k: Optional[int] = None,
|
|
3112
|
-
) ->
|
|
3109
|
+
) -> list[VisualQuestionAnsweringOutputElement]:
|
|
3113
3110
|
"""
|
|
3114
3111
|
Answering open-ended questions based on an image.
|
|
3115
3112
|
|
|
@@ -3126,12 +3123,12 @@ class AsyncInferenceClient:
|
|
|
3126
3123
|
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
3127
3124
|
topk answers if there are not enough options available within the context.
|
|
3128
3125
|
Returns:
|
|
3129
|
-
`
|
|
3126
|
+
`list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
3130
3127
|
|
|
3131
3128
|
Raises:
|
|
3132
3129
|
`InferenceTimeoutError`:
|
|
3133
3130
|
If the model is unavailable or the request times out.
|
|
3134
|
-
`
|
|
3131
|
+
[`HfHubHTTPError`]:
|
|
3135
3132
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3136
3133
|
|
|
3137
3134
|
Example:
|
|
@@ -3165,21 +3162,21 @@ class AsyncInferenceClient:
|
|
|
3165
3162
|
async def zero_shot_classification(
|
|
3166
3163
|
self,
|
|
3167
3164
|
text: str,
|
|
3168
|
-
candidate_labels:
|
|
3165
|
+
candidate_labels: list[str],
|
|
3169
3166
|
*,
|
|
3170
3167
|
multi_label: Optional[bool] = False,
|
|
3171
3168
|
hypothesis_template: Optional[str] = None,
|
|
3172
3169
|
model: Optional[str] = None,
|
|
3173
|
-
) ->
|
|
3170
|
+
) -> list[ZeroShotClassificationOutputElement]:
|
|
3174
3171
|
"""
|
|
3175
3172
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
3176
3173
|
|
|
3177
3174
|
Args:
|
|
3178
3175
|
text (`str`):
|
|
3179
3176
|
The input text to classify.
|
|
3180
|
-
candidate_labels (`
|
|
3177
|
+
candidate_labels (`list[str]`):
|
|
3181
3178
|
The set of possible class labels to classify the text into.
|
|
3182
|
-
labels (`
|
|
3179
|
+
labels (`list[str]`, *optional*):
|
|
3183
3180
|
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
3184
3181
|
multi_label (`bool`, *optional*):
|
|
3185
3182
|
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
@@ -3194,12 +3191,12 @@ class AsyncInferenceClient:
|
|
|
3194
3191
|
|
|
3195
3192
|
|
|
3196
3193
|
Returns:
|
|
3197
|
-
`
|
|
3194
|
+
`list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3198
3195
|
|
|
3199
3196
|
Raises:
|
|
3200
3197
|
[`InferenceTimeoutError`]:
|
|
3201
3198
|
If the model is unavailable or the request times out.
|
|
3202
|
-
`
|
|
3199
|
+
[`HfHubHTTPError`]:
|
|
3203
3200
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3204
3201
|
|
|
3205
3202
|
Example with `multi_label=False`:
|
|
@@ -3273,22 +3270,22 @@ class AsyncInferenceClient:
|
|
|
3273
3270
|
async def zero_shot_image_classification(
|
|
3274
3271
|
self,
|
|
3275
3272
|
image: ContentT,
|
|
3276
|
-
candidate_labels:
|
|
3273
|
+
candidate_labels: list[str],
|
|
3277
3274
|
*,
|
|
3278
3275
|
model: Optional[str] = None,
|
|
3279
3276
|
hypothesis_template: Optional[str] = None,
|
|
3280
3277
|
# deprecated argument
|
|
3281
|
-
labels:
|
|
3282
|
-
) ->
|
|
3278
|
+
labels: list[str] = None, # type: ignore
|
|
3279
|
+
) -> list[ZeroShotImageClassificationOutputElement]:
|
|
3283
3280
|
"""
|
|
3284
3281
|
Provide input image and text labels to predict text labels for the image.
|
|
3285
3282
|
|
|
3286
3283
|
Args:
|
|
3287
3284
|
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3288
3285
|
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3289
|
-
candidate_labels (`
|
|
3286
|
+
candidate_labels (`list[str]`):
|
|
3290
3287
|
The candidate labels for this image
|
|
3291
|
-
labels (`
|
|
3288
|
+
labels (`list[str]`, *optional*):
|
|
3292
3289
|
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
3293
3290
|
model (`str`, *optional*):
|
|
3294
3291
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
@@ -3298,12 +3295,12 @@ class AsyncInferenceClient:
|
|
|
3298
3295
|
replacing the placeholder with the candidate labels.
|
|
3299
3296
|
|
|
3300
3297
|
Returns:
|
|
3301
|
-
`
|
|
3298
|
+
`list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3302
3299
|
|
|
3303
3300
|
Raises:
|
|
3304
3301
|
[`InferenceTimeoutError`]:
|
|
3305
3302
|
If the model is unavailable or the request times out.
|
|
3306
|
-
`
|
|
3303
|
+
[`HfHubHTTPError`]:
|
|
3307
3304
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3308
3305
|
|
|
3309
3306
|
Example:
|
|
@@ -3338,144 +3335,7 @@ class AsyncInferenceClient:
|
|
|
3338
3335
|
response = await self._inner_post(request_parameters)
|
|
3339
3336
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3340
3337
|
|
|
3341
|
-
|
|
3342
|
-
version="0.35.0",
|
|
3343
|
-
message=(
|
|
3344
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3345
|
-
" Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
|
|
3346
|
-
),
|
|
3347
|
-
)
|
|
3348
|
-
async def list_deployed_models(
|
|
3349
|
-
self, frameworks: Union[None, str, Literal["all"], List[str]] = None
|
|
3350
|
-
) -> Dict[str, List[str]]:
|
|
3351
|
-
"""
|
|
3352
|
-
List models deployed on the HF Serverless Inference API service.
|
|
3353
|
-
|
|
3354
|
-
This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
|
|
3355
|
-
are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
|
|
3356
|
-
specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
|
|
3357
|
-
in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
|
|
3358
|
-
frameworks are checked, the more time it will take.
|
|
3359
|
-
|
|
3360
|
-
<Tip warning={true}>
|
|
3361
|
-
|
|
3362
|
-
This endpoint method does not return a live list of all models available for the HF Inference API service.
|
|
3363
|
-
It searches over a cached list of models that were recently available and the list may not be up to date.
|
|
3364
|
-
If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
|
|
3365
|
-
|
|
3366
|
-
</Tip>
|
|
3367
|
-
|
|
3368
|
-
<Tip>
|
|
3369
|
-
|
|
3370
|
-
This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
|
|
3371
|
-
check its availability, you can directly use [`~InferenceClient.get_model_status`].
|
|
3372
|
-
|
|
3373
|
-
</Tip>
|
|
3374
|
-
|
|
3375
|
-
Args:
|
|
3376
|
-
frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
|
|
3377
|
-
The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
|
|
3378
|
-
"all", all available frameworks will be tested. It is also possible to provide a single framework or a
|
|
3379
|
-
custom set of frameworks to check.
|
|
3380
|
-
|
|
3381
|
-
Returns:
|
|
3382
|
-
`Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
|
|
3383
|
-
|
|
3384
|
-
Example:
|
|
3385
|
-
```py
|
|
3386
|
-
# Must be run in an async contextthon
|
|
3387
|
-
>>> from huggingface_hub import AsyncInferenceClient
|
|
3388
|
-
>>> client = AsyncInferenceClient()
|
|
3389
|
-
|
|
3390
|
-
# Discover zero-shot-classification models currently deployed
|
|
3391
|
-
>>> models = await client.list_deployed_models()
|
|
3392
|
-
>>> models["zero-shot-classification"]
|
|
3393
|
-
['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
|
|
3394
|
-
|
|
3395
|
-
# List from only 1 framework
|
|
3396
|
-
>>> await client.list_deployed_models("text-generation-inference")
|
|
3397
|
-
{'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
|
|
3398
|
-
```
|
|
3399
|
-
"""
|
|
3400
|
-
if self.provider != "hf-inference":
|
|
3401
|
-
raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
|
|
3402
|
-
|
|
3403
|
-
# Resolve which frameworks to check
|
|
3404
|
-
if frameworks is None:
|
|
3405
|
-
frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
|
|
3406
|
-
elif frameworks == "all":
|
|
3407
|
-
frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
|
|
3408
|
-
elif isinstance(frameworks, str):
|
|
3409
|
-
frameworks = [frameworks]
|
|
3410
|
-
frameworks = list(set(frameworks))
|
|
3411
|
-
|
|
3412
|
-
# Fetch them iteratively
|
|
3413
|
-
models_by_task: Dict[str, List[str]] = {}
|
|
3414
|
-
|
|
3415
|
-
def _unpack_response(framework: str, items: List[Dict]) -> None:
|
|
3416
|
-
for model in items:
|
|
3417
|
-
if framework == "sentence-transformers":
|
|
3418
|
-
# Model running with the `sentence-transformers` framework can work with both tasks even if not
|
|
3419
|
-
# branded as such in the API response
|
|
3420
|
-
models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
|
|
3421
|
-
models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
|
|
3422
|
-
else:
|
|
3423
|
-
models_by_task.setdefault(model["task"], []).append(model["model_id"])
|
|
3424
|
-
|
|
3425
|
-
for framework in frameworks:
|
|
3426
|
-
response = get_session().get(
|
|
3427
|
-
f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3428
|
-
)
|
|
3429
|
-
hf_raise_for_status(response)
|
|
3430
|
-
_unpack_response(framework, response.json())
|
|
3431
|
-
|
|
3432
|
-
# Sort alphabetically for discoverability and return
|
|
3433
|
-
for task, models in models_by_task.items():
|
|
3434
|
-
models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
|
|
3435
|
-
return models_by_task
|
|
3436
|
-
|
|
3437
|
-
def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
|
|
3438
|
-
aiohttp = _import_aiohttp()
|
|
3439
|
-
client_headers = self.headers.copy()
|
|
3440
|
-
if headers is not None:
|
|
3441
|
-
client_headers.update(headers)
|
|
3442
|
-
|
|
3443
|
-
# Return a new aiohttp ClientSession with correct settings.
|
|
3444
|
-
session = aiohttp.ClientSession(
|
|
3445
|
-
headers=client_headers,
|
|
3446
|
-
cookies=self.cookies,
|
|
3447
|
-
timeout=aiohttp.ClientTimeout(self.timeout),
|
|
3448
|
-
trust_env=self.trust_env,
|
|
3449
|
-
)
|
|
3450
|
-
|
|
3451
|
-
# Keep track of sessions to close them later
|
|
3452
|
-
self._sessions[session] = set()
|
|
3453
|
-
|
|
3454
|
-
# Override the `._request` method to register responses to be closed
|
|
3455
|
-
session._wrapped_request = session._request
|
|
3456
|
-
|
|
3457
|
-
async def _request(method, url, **kwargs):
|
|
3458
|
-
response = await session._wrapped_request(method, url, **kwargs)
|
|
3459
|
-
self._sessions[session].add(response)
|
|
3460
|
-
return response
|
|
3461
|
-
|
|
3462
|
-
session._request = _request
|
|
3463
|
-
|
|
3464
|
-
# Override the 'close' method to
|
|
3465
|
-
# 1. close ongoing responses
|
|
3466
|
-
# 2. deregister the session when closed
|
|
3467
|
-
session._close = session.close
|
|
3468
|
-
|
|
3469
|
-
async def close_session():
|
|
3470
|
-
for response in self._sessions[session]:
|
|
3471
|
-
response.close()
|
|
3472
|
-
await session._close()
|
|
3473
|
-
self._sessions.pop(session, None)
|
|
3474
|
-
|
|
3475
|
-
session.close = close_session
|
|
3476
|
-
return session
|
|
3477
|
-
|
|
3478
|
-
async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
|
|
3338
|
+
async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
|
|
3479
3339
|
"""
|
|
3480
3340
|
Get information about the deployed endpoint.
|
|
3481
3341
|
|
|
@@ -3488,7 +3348,7 @@ class AsyncInferenceClient:
|
|
|
3488
3348
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
3489
3349
|
|
|
3490
3350
|
Returns:
|
|
3491
|
-
`
|
|
3351
|
+
`dict[str, Any]`: Information about the endpoint.
|
|
3492
3352
|
|
|
3493
3353
|
Example:
|
|
3494
3354
|
```py
|
|
@@ -3530,17 +3390,16 @@ class AsyncInferenceClient:
|
|
|
3530
3390
|
else:
|
|
3531
3391
|
url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
|
|
3532
3392
|
|
|
3533
|
-
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
|
|
3393
|
+
client = await self._get_async_client()
|
|
3394
|
+
response = await client.get(url, headers=build_hf_headers(token=self.token))
|
|
3395
|
+
hf_raise_for_status(response)
|
|
3396
|
+
return response.json()
|
|
3537
3397
|
|
|
3538
3398
|
async def health_check(self, model: Optional[str] = None) -> bool:
|
|
3539
3399
|
"""
|
|
3540
3400
|
Check the health of the deployed endpoint.
|
|
3541
3401
|
|
|
3542
3402
|
Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
|
|
3543
|
-
For Inference API, please use [`InferenceClient.get_model_status`] instead.
|
|
3544
3403
|
|
|
3545
3404
|
Args:
|
|
3546
3405
|
model (`str`, *optional*):
|
|
@@ -3565,77 +3424,12 @@ class AsyncInferenceClient:
|
|
|
3565
3424
|
if model is None:
|
|
3566
3425
|
raise ValueError("Model id not provided.")
|
|
3567
3426
|
if not model.startswith(("http://", "https://")):
|
|
3568
|
-
raise ValueError(
|
|
3569
|
-
"Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
|
|
3570
|
-
)
|
|
3427
|
+
raise ValueError("Model must be an Inference Endpoint URL.")
|
|
3571
3428
|
url = model.rstrip("/") + "/health"
|
|
3572
3429
|
|
|
3573
|
-
|
|
3574
|
-
|
|
3575
|
-
|
|
3576
|
-
|
|
3577
|
-
@_deprecate_method(
|
|
3578
|
-
version="0.35.0",
|
|
3579
|
-
message=(
|
|
3580
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3581
|
-
" Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
|
|
3582
|
-
),
|
|
3583
|
-
)
|
|
3584
|
-
async def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
|
|
3585
|
-
"""
|
|
3586
|
-
Get the status of a model hosted on the HF Inference API.
|
|
3587
|
-
|
|
3588
|
-
<Tip>
|
|
3589
|
-
|
|
3590
|
-
This endpoint is mostly useful when you already know which model you want to use and want to check its
|
|
3591
|
-
availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
|
|
3592
|
-
|
|
3593
|
-
</Tip>
|
|
3594
|
-
|
|
3595
|
-
Args:
|
|
3596
|
-
model (`str`, *optional*):
|
|
3597
|
-
Identifier of the model for witch the status gonna be checked. If model is not provided,
|
|
3598
|
-
the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
|
|
3599
|
-
identifier cannot be a URL.
|
|
3600
|
-
|
|
3601
|
-
|
|
3602
|
-
Returns:
|
|
3603
|
-
[`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
|
|
3604
|
-
about the state of the model: load, state, compute type and framework.
|
|
3605
|
-
|
|
3606
|
-
Example:
|
|
3607
|
-
```py
|
|
3608
|
-
# Must be run in an async context
|
|
3609
|
-
>>> from huggingface_hub import AsyncInferenceClient
|
|
3610
|
-
>>> client = AsyncInferenceClient()
|
|
3611
|
-
>>> await client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
|
|
3612
|
-
ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
|
|
3613
|
-
```
|
|
3614
|
-
"""
|
|
3615
|
-
if self.provider != "hf-inference":
|
|
3616
|
-
raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
|
|
3617
|
-
|
|
3618
|
-
model = model or self.model
|
|
3619
|
-
if model is None:
|
|
3620
|
-
raise ValueError("Model id not provided.")
|
|
3621
|
-
if model.startswith("https://"):
|
|
3622
|
-
raise NotImplementedError("Model status is only available for Inference API endpoints.")
|
|
3623
|
-
url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
|
|
3624
|
-
|
|
3625
|
-
async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
|
|
3626
|
-
response = await client.get(url, proxy=self.proxies)
|
|
3627
|
-
response.raise_for_status()
|
|
3628
|
-
response_data = await response.json()
|
|
3629
|
-
|
|
3630
|
-
if "error" in response_data:
|
|
3631
|
-
raise ValueError(response_data["error"])
|
|
3632
|
-
|
|
3633
|
-
return ModelStatus(
|
|
3634
|
-
loaded=response_data["loaded"],
|
|
3635
|
-
state=response_data["state"],
|
|
3636
|
-
compute_type=response_data["compute_type"],
|
|
3637
|
-
framework=response_data["framework"],
|
|
3638
|
-
)
|
|
3430
|
+
client = await self._get_async_client()
|
|
3431
|
+
response = await client.get(url, headers=build_hf_headers(token=self.token))
|
|
3432
|
+
return response.status_code == 200
|
|
3639
3433
|
|
|
3640
3434
|
@property
|
|
3641
3435
|
def chat(self) -> "ProxyClientChat":
|