huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +33 -45
- huggingface_hub/_commit_api.py +39 -43
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +20 -20
- huggingface_hub/_login.py +17 -43
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +135 -50
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +5 -5
- huggingface_hub/_upload_large_folder.py +18 -32
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/__init__.py +0 -14
- huggingface_hub/cli/_cli_utils.py +143 -39
- huggingface_hub/cli/auth.py +105 -171
- huggingface_hub/cli/cache.py +594 -361
- huggingface_hub/cli/download.py +120 -112
- huggingface_hub/cli/hf.py +38 -41
- huggingface_hub/cli/jobs.py +689 -1017
- huggingface_hub/cli/lfs.py +120 -143
- huggingface_hub/cli/repo.py +282 -216
- huggingface_hub/cli/repo_files.py +50 -84
- huggingface_hub/cli/system.py +6 -25
- huggingface_hub/cli/upload.py +198 -220
- huggingface_hub/cli/upload_large_folder.py +91 -106
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +17 -52
- huggingface_hub/dataclasses.py +135 -21
- huggingface_hub/errors.py +47 -30
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +351 -303
- huggingface_hub/hf_api.py +398 -570
- huggingface_hub/hf_file_system.py +101 -66
- huggingface_hub/hub_mixin.py +32 -54
- huggingface_hub/inference/_client.py +177 -162
- huggingface_hub/inference/_common.py +38 -54
- huggingface_hub/inference/_generated/_async_client.py +218 -258
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/constants.py +1 -2
- huggingface_hub/inference/_mcp/mcp_client.py +33 -22
- huggingface_hub/inference/_mcp/types.py +10 -10
- huggingface_hub/inference/_mcp/utils.py +4 -4
- huggingface_hub/inference/_providers/__init__.py +12 -4
- huggingface_hub/inference/_providers/_common.py +62 -24
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +13 -13
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +21 -94
- huggingface_hub/repocard.py +15 -16
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +11 -6
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +49 -74
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +3 -3
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +371 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +59 -23
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
- huggingface_hub/utils/_typing.py +3 -3
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +1 -1
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
- huggingface_hub-1.0.0.dist-info/RECORD +152 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -204
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -161
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -208
- huggingface_hub/commands/version.py +0 -40
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -497
- huggingface_hub/repository.py +0 -1471
- huggingface_hub/serialization/_tensorflow.py +0 -92
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -21,12 +21,16 @@
|
|
|
21
21
|
import asyncio
|
|
22
22
|
import base64
|
|
23
23
|
import logging
|
|
24
|
+
import os
|
|
24
25
|
import re
|
|
25
26
|
import warnings
|
|
26
|
-
from
|
|
27
|
+
from contextlib import AsyncExitStack
|
|
28
|
+
from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
|
|
29
|
+
|
|
30
|
+
import httpx
|
|
27
31
|
|
|
28
32
|
from huggingface_hub import constants
|
|
29
|
-
from huggingface_hub.errors import InferenceTimeoutError
|
|
33
|
+
from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
|
|
30
34
|
from huggingface_hub.inference._common import (
|
|
31
35
|
TASKS_EXPECTING_IMAGES,
|
|
32
36
|
ContentT,
|
|
@@ -86,15 +90,19 @@ from huggingface_hub.inference._generated.types import (
|
|
|
86
90
|
ZeroShotImageClassificationOutputElement,
|
|
87
91
|
)
|
|
88
92
|
from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
|
|
89
|
-
from huggingface_hub.utils import
|
|
93
|
+
from huggingface_hub.utils import (
|
|
94
|
+
build_hf_headers,
|
|
95
|
+
get_async_session,
|
|
96
|
+
hf_raise_for_status,
|
|
97
|
+
validate_hf_hub_args,
|
|
98
|
+
)
|
|
90
99
|
from huggingface_hub.utils._auth import get_token
|
|
91
100
|
|
|
92
|
-
from .._common import _async_yield_from
|
|
101
|
+
from .._common import _async_yield_from
|
|
93
102
|
|
|
94
103
|
|
|
95
104
|
if TYPE_CHECKING:
|
|
96
105
|
import numpy as np
|
|
97
|
-
from aiohttp import ClientResponse, ClientSession
|
|
98
106
|
from PIL.Image import Image
|
|
99
107
|
|
|
100
108
|
logger = logging.getLogger(__name__)
|
|
@@ -127,18 +135,14 @@ class AsyncInferenceClient:
|
|
|
127
135
|
arguments are mutually exclusive and have the exact same behavior.
|
|
128
136
|
timeout (`float`, `optional`):
|
|
129
137
|
The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
|
|
130
|
-
headers (`
|
|
138
|
+
headers (`dict[str, str]`, `optional`):
|
|
131
139
|
Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
|
|
132
140
|
Values in this dictionary will override the default values.
|
|
133
141
|
bill_to (`str`, `optional`):
|
|
134
142
|
The billing account to use for the requests. By default the requests are billed on the user's account.
|
|
135
143
|
Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
|
|
136
|
-
cookies (`
|
|
144
|
+
cookies (`dict[str, str]`, `optional`):
|
|
137
145
|
Additional cookies to send to the server.
|
|
138
|
-
trust_env ('bool', 'optional'):
|
|
139
|
-
Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
|
|
140
|
-
proxies (`Any`, `optional`):
|
|
141
|
-
Proxies to use for the request.
|
|
142
146
|
base_url (`str`, `optional`):
|
|
143
147
|
Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
|
|
144
148
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
|
|
@@ -147,6 +151,7 @@ class AsyncInferenceClient:
|
|
|
147
151
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
|
|
148
152
|
"""
|
|
149
153
|
|
|
154
|
+
@validate_hf_hub_args
|
|
150
155
|
def __init__(
|
|
151
156
|
self,
|
|
152
157
|
model: Optional[str] = None,
|
|
@@ -154,10 +159,8 @@ class AsyncInferenceClient:
|
|
|
154
159
|
provider: Optional[PROVIDER_OR_POLICY_T] = None,
|
|
155
160
|
token: Optional[str] = None,
|
|
156
161
|
timeout: Optional[float] = None,
|
|
157
|
-
headers: Optional[
|
|
158
|
-
cookies: Optional[
|
|
159
|
-
trust_env: bool = False,
|
|
160
|
-
proxies: Optional[Any] = None,
|
|
162
|
+
headers: Optional[dict[str, str]] = None,
|
|
163
|
+
cookies: Optional[dict[str, str]] = None,
|
|
161
164
|
bill_to: Optional[str] = None,
|
|
162
165
|
# OpenAI compatibility
|
|
163
166
|
base_url: Optional[str] = None,
|
|
@@ -219,15 +222,36 @@ class AsyncInferenceClient:
|
|
|
219
222
|
|
|
220
223
|
self.cookies = cookies
|
|
221
224
|
self.timeout = timeout
|
|
222
|
-
self.trust_env = trust_env
|
|
223
|
-
self.proxies = proxies
|
|
224
225
|
|
|
225
|
-
|
|
226
|
-
self.
|
|
226
|
+
self.exit_stack = AsyncExitStack()
|
|
227
|
+
self._async_client: Optional[httpx.AsyncClient] = None
|
|
227
228
|
|
|
228
229
|
def __repr__(self):
|
|
229
230
|
return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
|
|
230
231
|
|
|
232
|
+
async def __aenter__(self):
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
236
|
+
await self.close()
|
|
237
|
+
|
|
238
|
+
async def close(self):
|
|
239
|
+
"""Close the client.
|
|
240
|
+
|
|
241
|
+
This method is automatically called when using the client as a context manager.
|
|
242
|
+
"""
|
|
243
|
+
await self.exit_stack.aclose()
|
|
244
|
+
|
|
245
|
+
async def _get_async_client(self):
|
|
246
|
+
"""Get a unique async client for this AsyncInferenceClient instance.
|
|
247
|
+
|
|
248
|
+
Returns the same client instance on subsequent calls, ensuring proper
|
|
249
|
+
connection reuse and resource management through the exit stack.
|
|
250
|
+
"""
|
|
251
|
+
if self._async_client is None:
|
|
252
|
+
self._async_client = await self.exit_stack.enter_async_context(get_async_session())
|
|
253
|
+
return self._async_client
|
|
254
|
+
|
|
231
255
|
@overload
|
|
232
256
|
async def _inner_post( # type: ignore[misc]
|
|
233
257
|
self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
|
|
@@ -236,83 +260,60 @@ class AsyncInferenceClient:
|
|
|
236
260
|
@overload
|
|
237
261
|
async def _inner_post( # type: ignore[misc]
|
|
238
262
|
self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
|
|
239
|
-
) -> AsyncIterable[
|
|
263
|
+
) -> AsyncIterable[str]: ...
|
|
240
264
|
|
|
241
265
|
@overload
|
|
242
266
|
async def _inner_post(
|
|
243
267
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
244
|
-
) -> Union[bytes, AsyncIterable[
|
|
268
|
+
) -> Union[bytes, AsyncIterable[str]]: ...
|
|
245
269
|
|
|
246
270
|
async def _inner_post(
|
|
247
271
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
248
|
-
) -> Union[bytes, AsyncIterable[
|
|
272
|
+
) -> Union[bytes, AsyncIterable[str]]:
|
|
249
273
|
"""Make a request to the inference server."""
|
|
250
274
|
|
|
251
|
-
aiohttp = _import_aiohttp()
|
|
252
|
-
|
|
253
275
|
# TODO: this should be handled in provider helpers directly
|
|
254
276
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
255
277
|
request_parameters.headers["Accept"] = "image/png"
|
|
256
278
|
|
|
257
|
-
# Do not use context manager as we don't want to close the connection immediately when returning
|
|
258
|
-
# a stream
|
|
259
|
-
session = self._get_client_session(headers=request_parameters.headers)
|
|
260
|
-
|
|
261
279
|
try:
|
|
262
|
-
|
|
263
|
-
request_parameters.url, json=request_parameters.json, data=request_parameters.data, proxy=self.proxies
|
|
264
|
-
)
|
|
265
|
-
response_error_payload = None
|
|
266
|
-
if response.status != 200:
|
|
267
|
-
try:
|
|
268
|
-
response_error_payload = await response.json() # get payload before connection closed
|
|
269
|
-
except Exception:
|
|
270
|
-
pass
|
|
271
|
-
response.raise_for_status()
|
|
280
|
+
client = await self._get_async_client()
|
|
272
281
|
if stream:
|
|
273
|
-
|
|
282
|
+
response = await self.exit_stack.enter_async_context(
|
|
283
|
+
client.stream(
|
|
284
|
+
"POST",
|
|
285
|
+
request_parameters.url,
|
|
286
|
+
json=request_parameters.json,
|
|
287
|
+
data=request_parameters.data,
|
|
288
|
+
headers=request_parameters.headers,
|
|
289
|
+
cookies=self.cookies,
|
|
290
|
+
timeout=self.timeout,
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
hf_raise_for_status(response)
|
|
294
|
+
return _async_yield_from(client, response)
|
|
274
295
|
else:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
296
|
+
response = await client.post(
|
|
297
|
+
request_parameters.url,
|
|
298
|
+
json=request_parameters.json,
|
|
299
|
+
data=request_parameters.data,
|
|
300
|
+
headers=request_parameters.headers,
|
|
301
|
+
cookies=self.cookies,
|
|
302
|
+
timeout=self.timeout,
|
|
303
|
+
)
|
|
304
|
+
hf_raise_for_status(response)
|
|
305
|
+
return response.content
|
|
278
306
|
except asyncio.TimeoutError as error:
|
|
279
|
-
await session.close()
|
|
280
307
|
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
281
308
|
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
282
|
-
except
|
|
283
|
-
error.
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
309
|
+
except HfHubHTTPError as error:
|
|
310
|
+
if error.response.status_code == 422 and request_parameters.task != "unknown":
|
|
311
|
+
msg = str(error.args[0])
|
|
312
|
+
if len(error.response.text) > 0:
|
|
313
|
+
msg += f"{os.linesep}{error.response.text}{os.linesep}"
|
|
314
|
+
error.args = (msg,) + error.args[1:]
|
|
288
315
|
raise
|
|
289
316
|
|
|
290
|
-
async def __aenter__(self):
|
|
291
|
-
return self
|
|
292
|
-
|
|
293
|
-
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
294
|
-
await self.close()
|
|
295
|
-
|
|
296
|
-
def __del__(self):
|
|
297
|
-
if len(self._sessions) > 0:
|
|
298
|
-
warnings.warn(
|
|
299
|
-
"Deleting 'AsyncInferenceClient' client but some sessions are still open. "
|
|
300
|
-
"This can happen if you've stopped streaming data from the server before the stream was complete. "
|
|
301
|
-
"To close the client properly, you must call `await client.close()` "
|
|
302
|
-
"or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
async def close(self):
|
|
306
|
-
"""Close all open sessions.
|
|
307
|
-
|
|
308
|
-
By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
|
|
309
|
-
are streaming data from the server and you stop before the stream is complete, you must call this method to
|
|
310
|
-
close the session properly.
|
|
311
|
-
|
|
312
|
-
Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
|
|
313
|
-
"""
|
|
314
|
-
await asyncio.gather(*[session.close() for session in self._sessions.keys()])
|
|
315
|
-
|
|
316
317
|
async def audio_classification(
|
|
317
318
|
self,
|
|
318
319
|
audio: ContentT,
|
|
@@ -320,7 +321,7 @@ class AsyncInferenceClient:
|
|
|
320
321
|
model: Optional[str] = None,
|
|
321
322
|
top_k: Optional[int] = None,
|
|
322
323
|
function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
|
|
323
|
-
) ->
|
|
324
|
+
) -> list[AudioClassificationOutputElement]:
|
|
324
325
|
"""
|
|
325
326
|
Perform audio classification on the provided audio content.
|
|
326
327
|
|
|
@@ -338,12 +339,12 @@ class AsyncInferenceClient:
|
|
|
338
339
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
339
340
|
|
|
340
341
|
Returns:
|
|
341
|
-
`
|
|
342
|
+
`list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
342
343
|
|
|
343
344
|
Raises:
|
|
344
345
|
[`InferenceTimeoutError`]:
|
|
345
346
|
If the model is unavailable or the request times out.
|
|
346
|
-
`
|
|
347
|
+
[`HfHubHTTPError`]:
|
|
347
348
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
348
349
|
|
|
349
350
|
Example:
|
|
@@ -376,7 +377,7 @@ class AsyncInferenceClient:
|
|
|
376
377
|
audio: ContentT,
|
|
377
378
|
*,
|
|
378
379
|
model: Optional[str] = None,
|
|
379
|
-
) ->
|
|
380
|
+
) -> list[AudioToAudioOutputElement]:
|
|
380
381
|
"""
|
|
381
382
|
Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
|
|
382
383
|
|
|
@@ -390,12 +391,12 @@ class AsyncInferenceClient:
|
|
|
390
391
|
audio_to_audio will be used.
|
|
391
392
|
|
|
392
393
|
Returns:
|
|
393
|
-
`
|
|
394
|
+
`list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
|
|
394
395
|
|
|
395
396
|
Raises:
|
|
396
397
|
`InferenceTimeoutError`:
|
|
397
398
|
If the model is unavailable or the request times out.
|
|
398
|
-
`
|
|
399
|
+
[`HfHubHTTPError`]:
|
|
399
400
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
400
401
|
|
|
401
402
|
Example:
|
|
@@ -429,7 +430,7 @@ class AsyncInferenceClient:
|
|
|
429
430
|
audio: ContentT,
|
|
430
431
|
*,
|
|
431
432
|
model: Optional[str] = None,
|
|
432
|
-
extra_body: Optional[
|
|
433
|
+
extra_body: Optional[dict] = None,
|
|
433
434
|
) -> AutomaticSpeechRecognitionOutput:
|
|
434
435
|
"""
|
|
435
436
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -440,7 +441,7 @@ class AsyncInferenceClient:
|
|
|
440
441
|
model (`str`, *optional*):
|
|
441
442
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
442
443
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
443
|
-
extra_body (`
|
|
444
|
+
extra_body (`dict`, *optional*):
|
|
444
445
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
445
446
|
for supported parameters.
|
|
446
447
|
Returns:
|
|
@@ -449,7 +450,7 @@ class AsyncInferenceClient:
|
|
|
449
450
|
Raises:
|
|
450
451
|
[`InferenceTimeoutError`]:
|
|
451
452
|
If the model is unavailable or the request times out.
|
|
452
|
-
`
|
|
453
|
+
[`HfHubHTTPError`]:
|
|
453
454
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
454
455
|
|
|
455
456
|
Example:
|
|
@@ -476,105 +477,105 @@ class AsyncInferenceClient:
|
|
|
476
477
|
@overload
|
|
477
478
|
async def chat_completion( # type: ignore
|
|
478
479
|
self,
|
|
479
|
-
messages:
|
|
480
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
480
481
|
*,
|
|
481
482
|
model: Optional[str] = None,
|
|
482
483
|
stream: Literal[False] = False,
|
|
483
484
|
frequency_penalty: Optional[float] = None,
|
|
484
|
-
logit_bias: Optional[
|
|
485
|
+
logit_bias: Optional[list[float]] = None,
|
|
485
486
|
logprobs: Optional[bool] = None,
|
|
486
487
|
max_tokens: Optional[int] = None,
|
|
487
488
|
n: Optional[int] = None,
|
|
488
489
|
presence_penalty: Optional[float] = None,
|
|
489
490
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
490
491
|
seed: Optional[int] = None,
|
|
491
|
-
stop: Optional[
|
|
492
|
+
stop: Optional[list[str]] = None,
|
|
492
493
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
493
494
|
temperature: Optional[float] = None,
|
|
494
495
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
495
496
|
tool_prompt: Optional[str] = None,
|
|
496
|
-
tools: Optional[
|
|
497
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
497
498
|
top_logprobs: Optional[int] = None,
|
|
498
499
|
top_p: Optional[float] = None,
|
|
499
|
-
extra_body: Optional[
|
|
500
|
+
extra_body: Optional[dict] = None,
|
|
500
501
|
) -> ChatCompletionOutput: ...
|
|
501
502
|
|
|
502
503
|
@overload
|
|
503
504
|
async def chat_completion( # type: ignore
|
|
504
505
|
self,
|
|
505
|
-
messages:
|
|
506
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
506
507
|
*,
|
|
507
508
|
model: Optional[str] = None,
|
|
508
509
|
stream: Literal[True] = True,
|
|
509
510
|
frequency_penalty: Optional[float] = None,
|
|
510
|
-
logit_bias: Optional[
|
|
511
|
+
logit_bias: Optional[list[float]] = None,
|
|
511
512
|
logprobs: Optional[bool] = None,
|
|
512
513
|
max_tokens: Optional[int] = None,
|
|
513
514
|
n: Optional[int] = None,
|
|
514
515
|
presence_penalty: Optional[float] = None,
|
|
515
516
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
516
517
|
seed: Optional[int] = None,
|
|
517
|
-
stop: Optional[
|
|
518
|
+
stop: Optional[list[str]] = None,
|
|
518
519
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
519
520
|
temperature: Optional[float] = None,
|
|
520
521
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
521
522
|
tool_prompt: Optional[str] = None,
|
|
522
|
-
tools: Optional[
|
|
523
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
523
524
|
top_logprobs: Optional[int] = None,
|
|
524
525
|
top_p: Optional[float] = None,
|
|
525
|
-
extra_body: Optional[
|
|
526
|
+
extra_body: Optional[dict] = None,
|
|
526
527
|
) -> AsyncIterable[ChatCompletionStreamOutput]: ...
|
|
527
528
|
|
|
528
529
|
@overload
|
|
529
530
|
async def chat_completion(
|
|
530
531
|
self,
|
|
531
|
-
messages:
|
|
532
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
532
533
|
*,
|
|
533
534
|
model: Optional[str] = None,
|
|
534
535
|
stream: bool = False,
|
|
535
536
|
frequency_penalty: Optional[float] = None,
|
|
536
|
-
logit_bias: Optional[
|
|
537
|
+
logit_bias: Optional[list[float]] = None,
|
|
537
538
|
logprobs: Optional[bool] = None,
|
|
538
539
|
max_tokens: Optional[int] = None,
|
|
539
540
|
n: Optional[int] = None,
|
|
540
541
|
presence_penalty: Optional[float] = None,
|
|
541
542
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
542
543
|
seed: Optional[int] = None,
|
|
543
|
-
stop: Optional[
|
|
544
|
+
stop: Optional[list[str]] = None,
|
|
544
545
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
545
546
|
temperature: Optional[float] = None,
|
|
546
547
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
547
548
|
tool_prompt: Optional[str] = None,
|
|
548
|
-
tools: Optional[
|
|
549
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
549
550
|
top_logprobs: Optional[int] = None,
|
|
550
551
|
top_p: Optional[float] = None,
|
|
551
|
-
extra_body: Optional[
|
|
552
|
+
extra_body: Optional[dict] = None,
|
|
552
553
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
|
|
553
554
|
|
|
554
555
|
async def chat_completion(
|
|
555
556
|
self,
|
|
556
|
-
messages:
|
|
557
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
557
558
|
*,
|
|
558
559
|
model: Optional[str] = None,
|
|
559
560
|
stream: bool = False,
|
|
560
561
|
# Parameters from ChatCompletionInput (handled manually)
|
|
561
562
|
frequency_penalty: Optional[float] = None,
|
|
562
|
-
logit_bias: Optional[
|
|
563
|
+
logit_bias: Optional[list[float]] = None,
|
|
563
564
|
logprobs: Optional[bool] = None,
|
|
564
565
|
max_tokens: Optional[int] = None,
|
|
565
566
|
n: Optional[int] = None,
|
|
566
567
|
presence_penalty: Optional[float] = None,
|
|
567
568
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
568
569
|
seed: Optional[int] = None,
|
|
569
|
-
stop: Optional[
|
|
570
|
+
stop: Optional[list[str]] = None,
|
|
570
571
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
571
572
|
temperature: Optional[float] = None,
|
|
572
573
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
573
574
|
tool_prompt: Optional[str] = None,
|
|
574
|
-
tools: Optional[
|
|
575
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
575
576
|
top_logprobs: Optional[int] = None,
|
|
576
577
|
top_p: Optional[float] = None,
|
|
577
|
-
extra_body: Optional[
|
|
578
|
+
extra_body: Optional[dict] = None,
|
|
578
579
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
|
|
579
580
|
"""
|
|
580
581
|
A method for completing conversations using a specified language model.
|
|
@@ -600,7 +601,7 @@ class AsyncInferenceClient:
|
|
|
600
601
|
frequency_penalty (`float`, *optional*):
|
|
601
602
|
Penalizes new tokens based on their existing frequency
|
|
602
603
|
in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
|
|
603
|
-
logit_bias (`
|
|
604
|
+
logit_bias (`list[float]`, *optional*):
|
|
604
605
|
Adjusts the likelihood of specific tokens appearing in the generated output.
|
|
605
606
|
logprobs (`bool`, *optional*):
|
|
606
607
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
@@ -616,7 +617,7 @@ class AsyncInferenceClient:
|
|
|
616
617
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
617
618
|
seed (Optional[`int`], *optional*):
|
|
618
619
|
Seed for reproducible control flow. Defaults to None.
|
|
619
|
-
stop (`
|
|
620
|
+
stop (`list[str]`, *optional*):
|
|
620
621
|
Up to four strings which trigger the end of the response.
|
|
621
622
|
Defaults to None.
|
|
622
623
|
stream (`bool`, *optional*):
|
|
@@ -640,7 +641,7 @@ class AsyncInferenceClient:
|
|
|
640
641
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
641
642
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
642
643
|
provide a list of functions the model may generate JSON inputs for.
|
|
643
|
-
extra_body (`
|
|
644
|
+
extra_body (`dict`, *optional*):
|
|
644
645
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
645
646
|
for supported parameters.
|
|
646
647
|
Returns:
|
|
@@ -652,7 +653,7 @@ class AsyncInferenceClient:
|
|
|
652
653
|
Raises:
|
|
653
654
|
[`InferenceTimeoutError`]:
|
|
654
655
|
If the model is unavailable or the request times out.
|
|
655
|
-
`
|
|
656
|
+
[`HfHubHTTPError`]:
|
|
656
657
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
657
658
|
|
|
658
659
|
Example:
|
|
@@ -972,8 +973,8 @@ class AsyncInferenceClient:
|
|
|
972
973
|
max_question_len: Optional[int] = None,
|
|
973
974
|
max_seq_len: Optional[int] = None,
|
|
974
975
|
top_k: Optional[int] = None,
|
|
975
|
-
word_boxes: Optional[
|
|
976
|
-
) ->
|
|
976
|
+
word_boxes: Optional[list[Union[list[float], str]]] = None,
|
|
977
|
+
) -> list[DocumentQuestionAnsweringOutputElement]:
|
|
977
978
|
"""
|
|
978
979
|
Answer questions on document images.
|
|
979
980
|
|
|
@@ -1003,16 +1004,16 @@ class AsyncInferenceClient:
|
|
|
1003
1004
|
top_k (`int`, *optional*):
|
|
1004
1005
|
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
1005
1006
|
answers if there are not enough options available within the context.
|
|
1006
|
-
word_boxes (`
|
|
1007
|
+
word_boxes (`list[Union[list[float], str`, *optional*):
|
|
1007
1008
|
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
1008
1009
|
step and use the provided bounding boxes instead.
|
|
1009
1010
|
Returns:
|
|
1010
|
-
`
|
|
1011
|
+
`list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
1011
1012
|
|
|
1012
1013
|
Raises:
|
|
1013
1014
|
[`InferenceTimeoutError`]:
|
|
1014
1015
|
If the model is unavailable or the request times out.
|
|
1015
|
-
`
|
|
1016
|
+
[`HfHubHTTPError`]:
|
|
1016
1017
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1017
1018
|
|
|
1018
1019
|
|
|
@@ -1027,7 +1028,7 @@ class AsyncInferenceClient:
|
|
|
1027
1028
|
"""
|
|
1028
1029
|
model_id = model or self.model
|
|
1029
1030
|
provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
|
|
1030
|
-
inputs:
|
|
1031
|
+
inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
1031
1032
|
request_parameters = provider_helper.prepare_request(
|
|
1032
1033
|
inputs=inputs,
|
|
1033
1034
|
parameters={
|
|
@@ -1088,7 +1089,7 @@ class AsyncInferenceClient:
|
|
|
1088
1089
|
Raises:
|
|
1089
1090
|
[`InferenceTimeoutError`]:
|
|
1090
1091
|
If the model is unavailable or the request times out.
|
|
1091
|
-
`
|
|
1092
|
+
[`HfHubHTTPError`]:
|
|
1092
1093
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1093
1094
|
|
|
1094
1095
|
Example:
|
|
@@ -1126,9 +1127,9 @@ class AsyncInferenceClient:
|
|
|
1126
1127
|
text: str,
|
|
1127
1128
|
*,
|
|
1128
1129
|
model: Optional[str] = None,
|
|
1129
|
-
targets: Optional[
|
|
1130
|
+
targets: Optional[list[str]] = None,
|
|
1130
1131
|
top_k: Optional[int] = None,
|
|
1131
|
-
) ->
|
|
1132
|
+
) -> list[FillMaskOutputElement]:
|
|
1132
1133
|
"""
|
|
1133
1134
|
Fill in a hole with a missing word (token to be precise).
|
|
1134
1135
|
|
|
@@ -1138,20 +1139,20 @@ class AsyncInferenceClient:
|
|
|
1138
1139
|
model (`str`, *optional*):
|
|
1139
1140
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1140
1141
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1141
|
-
targets (`
|
|
1142
|
+
targets (`list[str`, *optional*):
|
|
1142
1143
|
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1143
1144
|
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1144
1145
|
resulting token will be used (with a warning, and that might be slower).
|
|
1145
1146
|
top_k (`int`, *optional*):
|
|
1146
1147
|
When passed, overrides the number of predictions to return.
|
|
1147
1148
|
Returns:
|
|
1148
|
-
`
|
|
1149
|
+
`list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
|
|
1149
1150
|
probability, token reference, and completed text.
|
|
1150
1151
|
|
|
1151
1152
|
Raises:
|
|
1152
1153
|
[`InferenceTimeoutError`]:
|
|
1153
1154
|
If the model is unavailable or the request times out.
|
|
1154
|
-
`
|
|
1155
|
+
[`HfHubHTTPError`]:
|
|
1155
1156
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1156
1157
|
|
|
1157
1158
|
Example:
|
|
@@ -1185,7 +1186,7 @@ class AsyncInferenceClient:
|
|
|
1185
1186
|
model: Optional[str] = None,
|
|
1186
1187
|
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1187
1188
|
top_k: Optional[int] = None,
|
|
1188
|
-
) ->
|
|
1189
|
+
) -> list[ImageClassificationOutputElement]:
|
|
1189
1190
|
"""
|
|
1190
1191
|
Perform image classification on the given image using the specified model.
|
|
1191
1192
|
|
|
@@ -1200,12 +1201,12 @@ class AsyncInferenceClient:
|
|
|
1200
1201
|
top_k (`int`, *optional*):
|
|
1201
1202
|
When specified, limits the output to the top K most probable classes.
|
|
1202
1203
|
Returns:
|
|
1203
|
-
`
|
|
1204
|
+
`list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1204
1205
|
|
|
1205
1206
|
Raises:
|
|
1206
1207
|
[`InferenceTimeoutError`]:
|
|
1207
1208
|
If the model is unavailable or the request times out.
|
|
1208
|
-
`
|
|
1209
|
+
[`HfHubHTTPError`]:
|
|
1209
1210
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1210
1211
|
|
|
1211
1212
|
Example:
|
|
@@ -1238,7 +1239,7 @@ class AsyncInferenceClient:
|
|
|
1238
1239
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1239
1240
|
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1240
1241
|
threshold: Optional[float] = None,
|
|
1241
|
-
) ->
|
|
1242
|
+
) -> list[ImageSegmentationOutputElement]:
|
|
1242
1243
|
"""
|
|
1243
1244
|
Perform image segmentation on the given image using the specified model.
|
|
1244
1245
|
|
|
@@ -1260,12 +1261,12 @@ class AsyncInferenceClient:
|
|
|
1260
1261
|
threshold (`float`, *optional*):
|
|
1261
1262
|
Probability threshold to filter out predicted masks.
|
|
1262
1263
|
Returns:
|
|
1263
|
-
`
|
|
1264
|
+
`list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
|
|
1264
1265
|
|
|
1265
1266
|
Raises:
|
|
1266
1267
|
[`InferenceTimeoutError`]:
|
|
1267
1268
|
If the model is unavailable or the request times out.
|
|
1268
|
-
`
|
|
1269
|
+
[`HfHubHTTPError`]:
|
|
1269
1270
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1270
1271
|
|
|
1271
1272
|
Example:
|
|
@@ -1341,7 +1342,7 @@ class AsyncInferenceClient:
|
|
|
1341
1342
|
Raises:
|
|
1342
1343
|
[`InferenceTimeoutError`]:
|
|
1343
1344
|
If the model is unavailable or the request times out.
|
|
1344
|
-
`
|
|
1345
|
+
[`HfHubHTTPError`]:
|
|
1345
1346
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1346
1347
|
|
|
1347
1348
|
Example:
|
|
@@ -1473,7 +1474,7 @@ class AsyncInferenceClient:
|
|
|
1473
1474
|
Raises:
|
|
1474
1475
|
[`InferenceTimeoutError`]:
|
|
1475
1476
|
If the model is unavailable or the request times out.
|
|
1476
|
-
`
|
|
1477
|
+
[`HfHubHTTPError`]:
|
|
1477
1478
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1478
1479
|
|
|
1479
1480
|
Example:
|
|
@@ -1497,12 +1498,12 @@ class AsyncInferenceClient:
|
|
|
1497
1498
|
api_key=self.token,
|
|
1498
1499
|
)
|
|
1499
1500
|
response = await self._inner_post(request_parameters)
|
|
1500
|
-
output_list:
|
|
1501
|
+
output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
|
|
1501
1502
|
return output_list[0]
|
|
1502
1503
|
|
|
1503
1504
|
async def object_detection(
|
|
1504
1505
|
self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
|
|
1505
|
-
) ->
|
|
1506
|
+
) -> list[ObjectDetectionOutputElement]:
|
|
1506
1507
|
"""
|
|
1507
1508
|
Perform object detection on the given image using the specified model.
|
|
1508
1509
|
|
|
@@ -1518,12 +1519,12 @@ class AsyncInferenceClient:
|
|
|
1518
1519
|
threshold (`float`, *optional*):
|
|
1519
1520
|
The probability necessary to make a prediction.
|
|
1520
1521
|
Returns:
|
|
1521
|
-
`
|
|
1522
|
+
`list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
|
|
1522
1523
|
|
|
1523
1524
|
Raises:
|
|
1524
1525
|
[`InferenceTimeoutError`]:
|
|
1525
1526
|
If the model is unavailable or the request times out.
|
|
1526
|
-
`
|
|
1527
|
+
[`HfHubHTTPError`]:
|
|
1527
1528
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1528
1529
|
`ValueError`:
|
|
1529
1530
|
If the request output is not a List.
|
|
@@ -1562,7 +1563,7 @@ class AsyncInferenceClient:
|
|
|
1562
1563
|
max_question_len: Optional[int] = None,
|
|
1563
1564
|
max_seq_len: Optional[int] = None,
|
|
1564
1565
|
top_k: Optional[int] = None,
|
|
1565
|
-
) -> Union[QuestionAnsweringOutputElement,
|
|
1566
|
+
) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
|
|
1566
1567
|
"""
|
|
1567
1568
|
Retrieve the answer to a question from a given text.
|
|
1568
1569
|
|
|
@@ -1594,13 +1595,13 @@ class AsyncInferenceClient:
|
|
|
1594
1595
|
topk answers if there are not enough options available within the context.
|
|
1595
1596
|
|
|
1596
1597
|
Returns:
|
|
1597
|
-
Union[`QuestionAnsweringOutputElement`,
|
|
1598
|
+
Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
|
|
1598
1599
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
1599
1600
|
When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
|
|
1600
1601
|
Raises:
|
|
1601
1602
|
[`InferenceTimeoutError`]:
|
|
1602
1603
|
If the model is unavailable or the request times out.
|
|
1603
|
-
`
|
|
1604
|
+
[`HfHubHTTPError`]:
|
|
1604
1605
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1605
1606
|
|
|
1606
1607
|
Example:
|
|
@@ -1635,15 +1636,15 @@ class AsyncInferenceClient:
|
|
|
1635
1636
|
return output
|
|
1636
1637
|
|
|
1637
1638
|
async def sentence_similarity(
|
|
1638
|
-
self, sentence: str, other_sentences:
|
|
1639
|
-
) ->
|
|
1639
|
+
self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
|
|
1640
|
+
) -> list[float]:
|
|
1640
1641
|
"""
|
|
1641
1642
|
Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
|
|
1642
1643
|
|
|
1643
1644
|
Args:
|
|
1644
1645
|
sentence (`str`):
|
|
1645
1646
|
The main sentence to compare to others.
|
|
1646
|
-
other_sentences (`
|
|
1647
|
+
other_sentences (`list[str]`):
|
|
1647
1648
|
The list of sentences to compare to.
|
|
1648
1649
|
model (`str`, *optional*):
|
|
1649
1650
|
The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1651,12 +1652,12 @@ class AsyncInferenceClient:
|
|
|
1651
1652
|
Defaults to None.
|
|
1652
1653
|
|
|
1653
1654
|
Returns:
|
|
1654
|
-
`
|
|
1655
|
+
`list[float]`: The embedding representing the input text.
|
|
1655
1656
|
|
|
1656
1657
|
Raises:
|
|
1657
1658
|
[`InferenceTimeoutError`]:
|
|
1658
1659
|
If the model is unavailable or the request times out.
|
|
1659
|
-
`
|
|
1660
|
+
[`HfHubHTTPError`]:
|
|
1660
1661
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1661
1662
|
|
|
1662
1663
|
Example:
|
|
@@ -1694,7 +1695,7 @@ class AsyncInferenceClient:
|
|
|
1694
1695
|
*,
|
|
1695
1696
|
model: Optional[str] = None,
|
|
1696
1697
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1697
|
-
generate_parameters: Optional[
|
|
1698
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
1698
1699
|
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1699
1700
|
) -> SummarizationOutput:
|
|
1700
1701
|
"""
|
|
@@ -1708,7 +1709,7 @@ class AsyncInferenceClient:
|
|
|
1708
1709
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
1709
1710
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
1710
1711
|
Whether to clean up the potential extra spaces in the text output.
|
|
1711
|
-
generate_parameters (`
|
|
1712
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
1712
1713
|
Additional parametrization of the text generation algorithm.
|
|
1713
1714
|
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1714
1715
|
The truncation strategy to use.
|
|
@@ -1718,7 +1719,7 @@ class AsyncInferenceClient:
|
|
|
1718
1719
|
Raises:
|
|
1719
1720
|
[`InferenceTimeoutError`]:
|
|
1720
1721
|
If the model is unavailable or the request times out.
|
|
1721
|
-
`
|
|
1722
|
+
[`HfHubHTTPError`]:
|
|
1722
1723
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1723
1724
|
|
|
1724
1725
|
Example:
|
|
@@ -1749,7 +1750,7 @@ class AsyncInferenceClient:
|
|
|
1749
1750
|
|
|
1750
1751
|
async def table_question_answering(
|
|
1751
1752
|
self,
|
|
1752
|
-
table:
|
|
1753
|
+
table: dict[str, Any],
|
|
1753
1754
|
query: str,
|
|
1754
1755
|
*,
|
|
1755
1756
|
model: Optional[str] = None,
|
|
@@ -1784,7 +1785,7 @@ class AsyncInferenceClient:
|
|
|
1784
1785
|
Raises:
|
|
1785
1786
|
[`InferenceTimeoutError`]:
|
|
1786
1787
|
If the model is unavailable or the request times out.
|
|
1787
|
-
`
|
|
1788
|
+
[`HfHubHTTPError`]:
|
|
1788
1789
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1789
1790
|
|
|
1790
1791
|
Example:
|
|
@@ -1810,12 +1811,12 @@ class AsyncInferenceClient:
|
|
|
1810
1811
|
response = await self._inner_post(request_parameters)
|
|
1811
1812
|
return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
|
|
1812
1813
|
|
|
1813
|
-
async def tabular_classification(self, table:
|
|
1814
|
+
async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
|
|
1814
1815
|
"""
|
|
1815
1816
|
Classifying a target category (a group) based on a set of attributes.
|
|
1816
1817
|
|
|
1817
1818
|
Args:
|
|
1818
|
-
table (`
|
|
1819
|
+
table (`dict[str, Any]`):
|
|
1819
1820
|
Set of attributes to classify.
|
|
1820
1821
|
model (`str`, *optional*):
|
|
1821
1822
|
The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1828,7 +1829,7 @@ class AsyncInferenceClient:
|
|
|
1828
1829
|
Raises:
|
|
1829
1830
|
[`InferenceTimeoutError`]:
|
|
1830
1831
|
If the model is unavailable or the request times out.
|
|
1831
|
-
`
|
|
1832
|
+
[`HfHubHTTPError`]:
|
|
1832
1833
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1833
1834
|
|
|
1834
1835
|
Example:
|
|
@@ -1866,12 +1867,12 @@ class AsyncInferenceClient:
|
|
|
1866
1867
|
response = await self._inner_post(request_parameters)
|
|
1867
1868
|
return _bytes_to_list(response)
|
|
1868
1869
|
|
|
1869
|
-
async def tabular_regression(self, table:
|
|
1870
|
+
async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
|
|
1870
1871
|
"""
|
|
1871
1872
|
Predicting a numerical target value given a set of attributes/features in a table.
|
|
1872
1873
|
|
|
1873
1874
|
Args:
|
|
1874
|
-
table (`
|
|
1875
|
+
table (`dict[str, Any]`):
|
|
1875
1876
|
Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
|
|
1876
1877
|
model (`str`, *optional*):
|
|
1877
1878
|
The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1884,7 +1885,7 @@ class AsyncInferenceClient:
|
|
|
1884
1885
|
Raises:
|
|
1885
1886
|
[`InferenceTimeoutError`]:
|
|
1886
1887
|
If the model is unavailable or the request times out.
|
|
1887
|
-
`
|
|
1888
|
+
[`HfHubHTTPError`]:
|
|
1888
1889
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1889
1890
|
|
|
1890
1891
|
Example:
|
|
@@ -1924,7 +1925,7 @@ class AsyncInferenceClient:
|
|
|
1924
1925
|
model: Optional[str] = None,
|
|
1925
1926
|
top_k: Optional[int] = None,
|
|
1926
1927
|
function_to_apply: Optional["TextClassificationOutputTransform"] = None,
|
|
1927
|
-
) ->
|
|
1928
|
+
) -> list[TextClassificationOutputElement]:
|
|
1928
1929
|
"""
|
|
1929
1930
|
Perform text classification (e.g. sentiment-analysis) on the given text.
|
|
1930
1931
|
|
|
@@ -1941,12 +1942,12 @@ class AsyncInferenceClient:
|
|
|
1941
1942
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
1942
1943
|
|
|
1943
1944
|
Returns:
|
|
1944
|
-
`
|
|
1945
|
+
`list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1945
1946
|
|
|
1946
1947
|
Raises:
|
|
1947
1948
|
[`InferenceTimeoutError`]:
|
|
1948
1949
|
If the model is unavailable or the request times out.
|
|
1949
|
-
`
|
|
1950
|
+
[`HfHubHTTPError`]:
|
|
1950
1951
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1951
1952
|
|
|
1952
1953
|
Example:
|
|
@@ -1995,8 +1996,8 @@ class AsyncInferenceClient:
|
|
|
1995
1996
|
repetition_penalty: Optional[float] = None,
|
|
1996
1997
|
return_full_text: Optional[bool] = None,
|
|
1997
1998
|
seed: Optional[int] = None,
|
|
1998
|
-
stop: Optional[
|
|
1999
|
-
stop_sequences: Optional[
|
|
1999
|
+
stop: Optional[list[str]] = None,
|
|
2000
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2000
2001
|
temperature: Optional[float] = None,
|
|
2001
2002
|
top_k: Optional[int] = None,
|
|
2002
2003
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2025,8 +2026,8 @@ class AsyncInferenceClient:
|
|
|
2025
2026
|
repetition_penalty: Optional[float] = None,
|
|
2026
2027
|
return_full_text: Optional[bool] = None,
|
|
2027
2028
|
seed: Optional[int] = None,
|
|
2028
|
-
stop: Optional[
|
|
2029
|
-
stop_sequences: Optional[
|
|
2029
|
+
stop: Optional[list[str]] = None,
|
|
2030
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2030
2031
|
temperature: Optional[float] = None,
|
|
2031
2032
|
top_k: Optional[int] = None,
|
|
2032
2033
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2055,8 +2056,8 @@ class AsyncInferenceClient:
|
|
|
2055
2056
|
repetition_penalty: Optional[float] = None,
|
|
2056
2057
|
return_full_text: Optional[bool] = None, # Manual default value
|
|
2057
2058
|
seed: Optional[int] = None,
|
|
2058
|
-
stop: Optional[
|
|
2059
|
-
stop_sequences: Optional[
|
|
2059
|
+
stop: Optional[list[str]] = None,
|
|
2060
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2060
2061
|
temperature: Optional[float] = None,
|
|
2061
2062
|
top_k: Optional[int] = None,
|
|
2062
2063
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2085,8 +2086,8 @@ class AsyncInferenceClient:
|
|
|
2085
2086
|
repetition_penalty: Optional[float] = None,
|
|
2086
2087
|
return_full_text: Optional[bool] = None,
|
|
2087
2088
|
seed: Optional[int] = None,
|
|
2088
|
-
stop: Optional[
|
|
2089
|
-
stop_sequences: Optional[
|
|
2089
|
+
stop: Optional[list[str]] = None,
|
|
2090
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2090
2091
|
temperature: Optional[float] = None,
|
|
2091
2092
|
top_k: Optional[int] = None,
|
|
2092
2093
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2115,8 +2116,8 @@ class AsyncInferenceClient:
|
|
|
2115
2116
|
repetition_penalty: Optional[float] = None,
|
|
2116
2117
|
return_full_text: Optional[bool] = None,
|
|
2117
2118
|
seed: Optional[int] = None,
|
|
2118
|
-
stop: Optional[
|
|
2119
|
-
stop_sequences: Optional[
|
|
2119
|
+
stop: Optional[list[str]] = None,
|
|
2120
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2120
2121
|
temperature: Optional[float] = None,
|
|
2121
2122
|
top_k: Optional[int] = None,
|
|
2122
2123
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2144,8 +2145,8 @@ class AsyncInferenceClient:
|
|
|
2144
2145
|
repetition_penalty: Optional[float] = None,
|
|
2145
2146
|
return_full_text: Optional[bool] = None,
|
|
2146
2147
|
seed: Optional[int] = None,
|
|
2147
|
-
stop: Optional[
|
|
2148
|
-
stop_sequences: Optional[
|
|
2148
|
+
stop: Optional[list[str]] = None,
|
|
2149
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2149
2150
|
temperature: Optional[float] = None,
|
|
2150
2151
|
top_k: Optional[int] = None,
|
|
2151
2152
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2198,9 +2199,9 @@ class AsyncInferenceClient:
|
|
|
2198
2199
|
Whether to prepend the prompt to the generated text
|
|
2199
2200
|
seed (`int`, *optional*):
|
|
2200
2201
|
Random sampling seed
|
|
2201
|
-
stop (`
|
|
2202
|
+
stop (`list[str]`, *optional*):
|
|
2202
2203
|
Stop generating tokens if a member of `stop` is generated.
|
|
2203
|
-
stop_sequences (`
|
|
2204
|
+
stop_sequences (`list[str]`, *optional*):
|
|
2204
2205
|
Deprecated argument. Use `stop` instead.
|
|
2205
2206
|
temperature (`float`, *optional*):
|
|
2206
2207
|
The value used to module the logits distribution.
|
|
@@ -2221,10 +2222,10 @@ class AsyncInferenceClient:
|
|
|
2221
2222
|
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
|
|
2222
2223
|
|
|
2223
2224
|
Returns:
|
|
2224
|
-
`Union[str, TextGenerationOutput,
|
|
2225
|
+
`Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
|
|
2225
2226
|
Generated text returned from the server:
|
|
2226
2227
|
- if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
|
|
2227
|
-
- if `stream=True` and `details=False`, the generated text is returned token by token as a `
|
|
2228
|
+
- if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
|
|
2228
2229
|
- if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
|
|
2229
2230
|
- if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
|
|
2230
2231
|
|
|
@@ -2233,7 +2234,7 @@ class AsyncInferenceClient:
|
|
|
2233
2234
|
If input values are not valid. No HTTP call is made to the server.
|
|
2234
2235
|
[`InferenceTimeoutError`]:
|
|
2235
2236
|
If the model is unavailable or the request times out.
|
|
2236
|
-
`
|
|
2237
|
+
[`HfHubHTTPError`]:
|
|
2237
2238
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2238
2239
|
|
|
2239
2240
|
Example:
|
|
@@ -2423,9 +2424,9 @@ class AsyncInferenceClient:
|
|
|
2423
2424
|
# Handle errors separately for more precise error messages
|
|
2424
2425
|
try:
|
|
2425
2426
|
bytes_output = await self._inner_post(request_parameters, stream=stream or False)
|
|
2426
|
-
except
|
|
2427
|
-
match = MODEL_KWARGS_NOT_USED_REGEX.search(e
|
|
2428
|
-
if e
|
|
2427
|
+
except HfHubHTTPError as e:
|
|
2428
|
+
match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
|
|
2429
|
+
if isinstance(e, BadRequestError) and match:
|
|
2429
2430
|
unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
|
|
2430
2431
|
_set_unsupported_text_generation_kwargs(model, unused_params)
|
|
2431
2432
|
return await self.text_generation( # type: ignore
|
|
@@ -2478,7 +2479,7 @@ class AsyncInferenceClient:
|
|
|
2478
2479
|
model: Optional[str] = None,
|
|
2479
2480
|
scheduler: Optional[str] = None,
|
|
2480
2481
|
seed: Optional[int] = None,
|
|
2481
|
-
extra_body: Optional[
|
|
2482
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2482
2483
|
) -> "Image":
|
|
2483
2484
|
"""
|
|
2484
2485
|
Generate an image based on a given text using a specified model.
|
|
@@ -2512,7 +2513,7 @@ class AsyncInferenceClient:
|
|
|
2512
2513
|
Override the scheduler with a compatible one.
|
|
2513
2514
|
seed (`int`, *optional*):
|
|
2514
2515
|
Seed for the random number generator.
|
|
2515
|
-
extra_body (`
|
|
2516
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2516
2517
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2517
2518
|
for supported parameters.
|
|
2518
2519
|
|
|
@@ -2522,7 +2523,7 @@ class AsyncInferenceClient:
|
|
|
2522
2523
|
Raises:
|
|
2523
2524
|
[`InferenceTimeoutError`]:
|
|
2524
2525
|
If the model is unavailable or the request times out.
|
|
2525
|
-
`
|
|
2526
|
+
[`HfHubHTTPError`]:
|
|
2526
2527
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2527
2528
|
|
|
2528
2529
|
Example:
|
|
@@ -2612,11 +2613,11 @@ class AsyncInferenceClient:
|
|
|
2612
2613
|
*,
|
|
2613
2614
|
model: Optional[str] = None,
|
|
2614
2615
|
guidance_scale: Optional[float] = None,
|
|
2615
|
-
negative_prompt: Optional[
|
|
2616
|
+
negative_prompt: Optional[list[str]] = None,
|
|
2616
2617
|
num_frames: Optional[float] = None,
|
|
2617
2618
|
num_inference_steps: Optional[int] = None,
|
|
2618
2619
|
seed: Optional[int] = None,
|
|
2619
|
-
extra_body: Optional[
|
|
2620
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2620
2621
|
) -> bytes:
|
|
2621
2622
|
"""
|
|
2622
2623
|
Generate a video based on a given text.
|
|
@@ -2634,7 +2635,7 @@ class AsyncInferenceClient:
|
|
|
2634
2635
|
guidance_scale (`float`, *optional*):
|
|
2635
2636
|
A higher guidance scale value encourages the model to generate videos closely linked to the text
|
|
2636
2637
|
prompt, but values too high may cause saturation and other artifacts.
|
|
2637
|
-
negative_prompt (`
|
|
2638
|
+
negative_prompt (`list[str]`, *optional*):
|
|
2638
2639
|
One or several prompt to guide what NOT to include in video generation.
|
|
2639
2640
|
num_frames (`float`, *optional*):
|
|
2640
2641
|
The num_frames parameter determines how many video frames are generated.
|
|
@@ -2643,7 +2644,7 @@ class AsyncInferenceClient:
|
|
|
2643
2644
|
expense of slower inference.
|
|
2644
2645
|
seed (`int`, *optional*):
|
|
2645
2646
|
Seed for the random number generator.
|
|
2646
|
-
extra_body (`
|
|
2647
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2647
2648
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2648
2649
|
for supported parameters.
|
|
2649
2650
|
|
|
@@ -2723,7 +2724,7 @@ class AsyncInferenceClient:
|
|
|
2723
2724
|
top_p: Optional[float] = None,
|
|
2724
2725
|
typical_p: Optional[float] = None,
|
|
2725
2726
|
use_cache: Optional[bool] = None,
|
|
2726
|
-
extra_body: Optional[
|
|
2727
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2727
2728
|
) -> bytes:
|
|
2728
2729
|
"""
|
|
2729
2730
|
Synthesize an audio of a voice pronouncing a given text.
|
|
@@ -2784,7 +2785,7 @@ class AsyncInferenceClient:
|
|
|
2784
2785
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2785
2786
|
use_cache (`bool`, *optional*):
|
|
2786
2787
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2787
|
-
extra_body (`
|
|
2788
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2788
2789
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2789
2790
|
for supported parameters.
|
|
2790
2791
|
Returns:
|
|
@@ -2793,7 +2794,7 @@ class AsyncInferenceClient:
|
|
|
2793
2794
|
Raises:
|
|
2794
2795
|
[`InferenceTimeoutError`]:
|
|
2795
2796
|
If the model is unavailable or the request times out.
|
|
2796
|
-
`
|
|
2797
|
+
[`HfHubHTTPError`]:
|
|
2797
2798
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2798
2799
|
|
|
2799
2800
|
Example:
|
|
@@ -2917,9 +2918,9 @@ class AsyncInferenceClient:
|
|
|
2917
2918
|
*,
|
|
2918
2919
|
model: Optional[str] = None,
|
|
2919
2920
|
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2920
|
-
ignore_labels: Optional[
|
|
2921
|
+
ignore_labels: Optional[list[str]] = None,
|
|
2921
2922
|
stride: Optional[int] = None,
|
|
2922
|
-
) ->
|
|
2923
|
+
) -> list[TokenClassificationOutputElement]:
|
|
2923
2924
|
"""
|
|
2924
2925
|
Perform token classification on the given text.
|
|
2925
2926
|
Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
|
|
@@ -2933,18 +2934,18 @@ class AsyncInferenceClient:
|
|
|
2933
2934
|
Defaults to None.
|
|
2934
2935
|
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2935
2936
|
The strategy used to fuse tokens based on model predictions
|
|
2936
|
-
ignore_labels (`
|
|
2937
|
+
ignore_labels (`list[str`, *optional*):
|
|
2937
2938
|
A list of labels to ignore
|
|
2938
2939
|
stride (`int`, *optional*):
|
|
2939
2940
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2940
2941
|
|
|
2941
2942
|
Returns:
|
|
2942
|
-
`
|
|
2943
|
+
`list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
|
|
2943
2944
|
|
|
2944
2945
|
Raises:
|
|
2945
2946
|
[`InferenceTimeoutError`]:
|
|
2946
2947
|
If the model is unavailable or the request times out.
|
|
2947
|
-
`
|
|
2948
|
+
[`HfHubHTTPError`]:
|
|
2948
2949
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2949
2950
|
|
|
2950
2951
|
Example:
|
|
@@ -2996,7 +2997,7 @@ class AsyncInferenceClient:
|
|
|
2996
2997
|
tgt_lang: Optional[str] = None,
|
|
2997
2998
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2998
2999
|
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
2999
|
-
generate_parameters: Optional[
|
|
3000
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
3000
3001
|
) -> TranslationOutput:
|
|
3001
3002
|
"""
|
|
3002
3003
|
Convert text from one language to another.
|
|
@@ -3021,7 +3022,7 @@ class AsyncInferenceClient:
|
|
|
3021
3022
|
Whether to clean up the potential extra spaces in the text output.
|
|
3022
3023
|
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
3023
3024
|
The truncation strategy to use.
|
|
3024
|
-
generate_parameters (`
|
|
3025
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
3025
3026
|
Additional parametrization of the text generation algorithm.
|
|
3026
3027
|
|
|
3027
3028
|
Returns:
|
|
@@ -3030,7 +3031,7 @@ class AsyncInferenceClient:
|
|
|
3030
3031
|
Raises:
|
|
3031
3032
|
[`InferenceTimeoutError`]:
|
|
3032
3033
|
If the model is unavailable or the request times out.
|
|
3033
|
-
`
|
|
3034
|
+
[`HfHubHTTPError`]:
|
|
3034
3035
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3035
3036
|
`ValueError`:
|
|
3036
3037
|
If only one of the `src_lang` and `tgt_lang` arguments are provided.
|
|
@@ -3084,7 +3085,7 @@ class AsyncInferenceClient:
|
|
|
3084
3085
|
*,
|
|
3085
3086
|
model: Optional[str] = None,
|
|
3086
3087
|
top_k: Optional[int] = None,
|
|
3087
|
-
) ->
|
|
3088
|
+
) -> list[VisualQuestionAnsweringOutputElement]:
|
|
3088
3089
|
"""
|
|
3089
3090
|
Answering open-ended questions based on an image.
|
|
3090
3091
|
|
|
@@ -3101,12 +3102,12 @@ class AsyncInferenceClient:
|
|
|
3101
3102
|
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
3102
3103
|
topk answers if there are not enough options available within the context.
|
|
3103
3104
|
Returns:
|
|
3104
|
-
`
|
|
3105
|
+
`list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
3105
3106
|
|
|
3106
3107
|
Raises:
|
|
3107
3108
|
`InferenceTimeoutError`:
|
|
3108
3109
|
If the model is unavailable or the request times out.
|
|
3109
|
-
`
|
|
3110
|
+
[`HfHubHTTPError`]:
|
|
3110
3111
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3111
3112
|
|
|
3112
3113
|
Example:
|
|
@@ -3140,21 +3141,21 @@ class AsyncInferenceClient:
|
|
|
3140
3141
|
async def zero_shot_classification(
|
|
3141
3142
|
self,
|
|
3142
3143
|
text: str,
|
|
3143
|
-
candidate_labels:
|
|
3144
|
+
candidate_labels: list[str],
|
|
3144
3145
|
*,
|
|
3145
3146
|
multi_label: Optional[bool] = False,
|
|
3146
3147
|
hypothesis_template: Optional[str] = None,
|
|
3147
3148
|
model: Optional[str] = None,
|
|
3148
|
-
) ->
|
|
3149
|
+
) -> list[ZeroShotClassificationOutputElement]:
|
|
3149
3150
|
"""
|
|
3150
3151
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
3151
3152
|
|
|
3152
3153
|
Args:
|
|
3153
3154
|
text (`str`):
|
|
3154
3155
|
The input text to classify.
|
|
3155
|
-
candidate_labels (`
|
|
3156
|
+
candidate_labels (`list[str]`):
|
|
3156
3157
|
The set of possible class labels to classify the text into.
|
|
3157
|
-
labels (`
|
|
3158
|
+
labels (`list[str]`, *optional*):
|
|
3158
3159
|
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
3159
3160
|
multi_label (`bool`, *optional*):
|
|
3160
3161
|
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
@@ -3169,12 +3170,12 @@ class AsyncInferenceClient:
|
|
|
3169
3170
|
|
|
3170
3171
|
|
|
3171
3172
|
Returns:
|
|
3172
|
-
`
|
|
3173
|
+
`list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3173
3174
|
|
|
3174
3175
|
Raises:
|
|
3175
3176
|
[`InferenceTimeoutError`]:
|
|
3176
3177
|
If the model is unavailable or the request times out.
|
|
3177
|
-
`
|
|
3178
|
+
[`HfHubHTTPError`]:
|
|
3178
3179
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3179
3180
|
|
|
3180
3181
|
Example with `multi_label=False`:
|
|
@@ -3248,22 +3249,22 @@ class AsyncInferenceClient:
|
|
|
3248
3249
|
async def zero_shot_image_classification(
|
|
3249
3250
|
self,
|
|
3250
3251
|
image: ContentT,
|
|
3251
|
-
candidate_labels:
|
|
3252
|
+
candidate_labels: list[str],
|
|
3252
3253
|
*,
|
|
3253
3254
|
model: Optional[str] = None,
|
|
3254
3255
|
hypothesis_template: Optional[str] = None,
|
|
3255
3256
|
# deprecated argument
|
|
3256
|
-
labels:
|
|
3257
|
-
) ->
|
|
3257
|
+
labels: list[str] = None, # type: ignore
|
|
3258
|
+
) -> list[ZeroShotImageClassificationOutputElement]:
|
|
3258
3259
|
"""
|
|
3259
3260
|
Provide input image and text labels to predict text labels for the image.
|
|
3260
3261
|
|
|
3261
3262
|
Args:
|
|
3262
3263
|
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3263
3264
|
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3264
|
-
candidate_labels (`
|
|
3265
|
+
candidate_labels (`list[str]`):
|
|
3265
3266
|
The candidate labels for this image
|
|
3266
|
-
labels (`
|
|
3267
|
+
labels (`list[str]`, *optional*):
|
|
3267
3268
|
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
3268
3269
|
model (`str`, *optional*):
|
|
3269
3270
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
@@ -3273,12 +3274,12 @@ class AsyncInferenceClient:
|
|
|
3273
3274
|
replacing the placeholder with the candidate labels.
|
|
3274
3275
|
|
|
3275
3276
|
Returns:
|
|
3276
|
-
`
|
|
3277
|
+
`list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3277
3278
|
|
|
3278
3279
|
Raises:
|
|
3279
3280
|
[`InferenceTimeoutError`]:
|
|
3280
3281
|
If the model is unavailable or the request times out.
|
|
3281
|
-
`
|
|
3282
|
+
[`HfHubHTTPError`]:
|
|
3282
3283
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3283
3284
|
|
|
3284
3285
|
Example:
|
|
@@ -3313,48 +3314,7 @@ class AsyncInferenceClient:
|
|
|
3313
3314
|
response = await self._inner_post(request_parameters)
|
|
3314
3315
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3315
3316
|
|
|
3316
|
-
def
|
|
3317
|
-
aiohttp = _import_aiohttp()
|
|
3318
|
-
client_headers = self.headers.copy()
|
|
3319
|
-
if headers is not None:
|
|
3320
|
-
client_headers.update(headers)
|
|
3321
|
-
|
|
3322
|
-
# Return a new aiohttp ClientSession with correct settings.
|
|
3323
|
-
session = aiohttp.ClientSession(
|
|
3324
|
-
headers=client_headers,
|
|
3325
|
-
cookies=self.cookies,
|
|
3326
|
-
timeout=aiohttp.ClientTimeout(self.timeout),
|
|
3327
|
-
trust_env=self.trust_env,
|
|
3328
|
-
)
|
|
3329
|
-
|
|
3330
|
-
# Keep track of sessions to close them later
|
|
3331
|
-
self._sessions[session] = set()
|
|
3332
|
-
|
|
3333
|
-
# Override the `._request` method to register responses to be closed
|
|
3334
|
-
session._wrapped_request = session._request
|
|
3335
|
-
|
|
3336
|
-
async def _request(method, url, **kwargs):
|
|
3337
|
-
response = await session._wrapped_request(method, url, **kwargs)
|
|
3338
|
-
self._sessions[session].add(response)
|
|
3339
|
-
return response
|
|
3340
|
-
|
|
3341
|
-
session._request = _request
|
|
3342
|
-
|
|
3343
|
-
# Override the 'close' method to
|
|
3344
|
-
# 1. close ongoing responses
|
|
3345
|
-
# 2. deregister the session when closed
|
|
3346
|
-
session._close = session.close
|
|
3347
|
-
|
|
3348
|
-
async def close_session():
|
|
3349
|
-
for response in self._sessions[session]:
|
|
3350
|
-
response.close()
|
|
3351
|
-
await session._close()
|
|
3352
|
-
self._sessions.pop(session, None)
|
|
3353
|
-
|
|
3354
|
-
session.close = close_session
|
|
3355
|
-
return session
|
|
3356
|
-
|
|
3357
|
-
async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
|
|
3317
|
+
async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
|
|
3358
3318
|
"""
|
|
3359
3319
|
Get information about the deployed endpoint.
|
|
3360
3320
|
|
|
@@ -3367,7 +3327,7 @@ class AsyncInferenceClient:
|
|
|
3367
3327
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
3368
3328
|
|
|
3369
3329
|
Returns:
|
|
3370
|
-
`
|
|
3330
|
+
`dict[str, Any]`: Information about the endpoint.
|
|
3371
3331
|
|
|
3372
3332
|
Example:
|
|
3373
3333
|
```py
|
|
@@ -3409,10 +3369,10 @@ class AsyncInferenceClient:
|
|
|
3409
3369
|
else:
|
|
3410
3370
|
url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
|
|
3411
3371
|
|
|
3412
|
-
|
|
3413
|
-
|
|
3414
|
-
|
|
3415
|
-
|
|
3372
|
+
client = await self._get_async_client()
|
|
3373
|
+
response = await client.get(url, headers=build_hf_headers(token=self.token))
|
|
3374
|
+
hf_raise_for_status(response)
|
|
3375
|
+
return response.json()
|
|
3416
3376
|
|
|
3417
3377
|
async def health_check(self, model: Optional[str] = None) -> bool:
|
|
3418
3378
|
"""
|
|
@@ -3446,9 +3406,9 @@ class AsyncInferenceClient:
|
|
|
3446
3406
|
raise ValueError("Model must be an Inference Endpoint URL.")
|
|
3447
3407
|
url = model.rstrip("/") + "/health"
|
|
3448
3408
|
|
|
3449
|
-
|
|
3450
|
-
|
|
3451
|
-
|
|
3409
|
+
client = await self._get_async_client()
|
|
3410
|
+
response = await client.get(url, headers=build_hf_headers(token=self.token))
|
|
3411
|
+
return response.status_code == 200
|
|
3452
3412
|
|
|
3453
3413
|
@property
|
|
3454
3414
|
def chat(self) -> "ProxyClientChat":
|