huggingface-hub 0.35.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +28 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +20 -20
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +5 -5
- huggingface_hub/_upload_large_folder.py +15 -15
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/__init__.py +0 -14
- huggingface_hub/cli/_cli_utils.py +80 -3
- huggingface_hub/cli/auth.py +104 -150
- huggingface_hub/cli/cache.py +102 -126
- huggingface_hub/cli/download.py +93 -110
- huggingface_hub/cli/hf.py +37 -41
- huggingface_hub/cli/jobs.py +689 -1017
- huggingface_hub/cli/lfs.py +120 -143
- huggingface_hub/cli/repo.py +158 -216
- huggingface_hub/cli/repo_files.py +50 -84
- huggingface_hub/cli/system.py +6 -25
- huggingface_hub/cli/upload.py +198 -212
- huggingface_hub/cli/upload_large_folder.py +90 -105
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +11 -11
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -22
- huggingface_hub/errors.py +43 -30
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +154 -253
- huggingface_hub/hf_api.py +329 -558
- huggingface_hub/hf_file_system.py +104 -62
- huggingface_hub/hub_mixin.py +32 -54
- huggingface_hub/inference/_client.py +178 -163
- huggingface_hub/inference/_common.py +38 -54
- huggingface_hub/inference/_generated/_async_client.py +219 -259
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/constants.py +1 -2
- huggingface_hub/inference/_mcp/mcp_client.py +33 -22
- huggingface_hub/inference/_mcp/types.py +10 -10
- huggingface_hub/inference/_mcp/utils.py +4 -4
- huggingface_hub/inference/_providers/__init__.py +2 -13
- huggingface_hub/inference/_providers/_common.py +24 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +13 -13
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +24 -33
- huggingface_hub/repocard.py +16 -17
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +3 -3
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +369 -209
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +15 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +3 -3
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +1 -1
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/METADATA +17 -26
- huggingface_hub-1.0.0rc1.dist-info/RECORD +161 -0
- huggingface_hub/inference/_providers/publicai.py +0 -6
- huggingface_hub/inference/_providers/scaleway.py +0 -28
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.1.dist-info/RECORD +0 -168
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -21,12 +21,16 @@
|
|
|
21
21
|
import asyncio
|
|
22
22
|
import base64
|
|
23
23
|
import logging
|
|
24
|
+
import os
|
|
24
25
|
import re
|
|
25
26
|
import warnings
|
|
26
|
-
from
|
|
27
|
+
from contextlib import AsyncExitStack
|
|
28
|
+
from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
|
|
29
|
+
|
|
30
|
+
import httpx
|
|
27
31
|
|
|
28
32
|
from huggingface_hub import constants
|
|
29
|
-
from huggingface_hub.errors import InferenceTimeoutError
|
|
33
|
+
from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
|
|
30
34
|
from huggingface_hub.inference._common import (
|
|
31
35
|
TASKS_EXPECTING_IMAGES,
|
|
32
36
|
ContentT,
|
|
@@ -86,15 +90,19 @@ from huggingface_hub.inference._generated.types import (
|
|
|
86
90
|
ZeroShotImageClassificationOutputElement,
|
|
87
91
|
)
|
|
88
92
|
from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
|
|
89
|
-
from huggingface_hub.utils import
|
|
93
|
+
from huggingface_hub.utils import (
|
|
94
|
+
build_hf_headers,
|
|
95
|
+
get_async_session,
|
|
96
|
+
hf_raise_for_status,
|
|
97
|
+
validate_hf_hub_args,
|
|
98
|
+
)
|
|
90
99
|
from huggingface_hub.utils._auth import get_token
|
|
91
100
|
|
|
92
|
-
from .._common import _async_yield_from
|
|
101
|
+
from .._common import _async_yield_from
|
|
93
102
|
|
|
94
103
|
|
|
95
104
|
if TYPE_CHECKING:
|
|
96
105
|
import numpy as np
|
|
97
|
-
from aiohttp import ClientResponse, ClientSession
|
|
98
106
|
from PIL.Image import Image
|
|
99
107
|
|
|
100
108
|
logger = logging.getLogger(__name__)
|
|
@@ -118,7 +126,7 @@ class AsyncInferenceClient:
|
|
|
118
126
|
Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
|
|
119
127
|
arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
|
|
120
128
|
provider (`str`, *optional*):
|
|
121
|
-
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `
|
|
129
|
+
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
|
|
122
130
|
Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
|
|
123
131
|
If model is a URL or `base_url` is passed, then `provider` is not used.
|
|
124
132
|
token (`str`, *optional*):
|
|
@@ -127,18 +135,14 @@ class AsyncInferenceClient:
|
|
|
127
135
|
arguments are mutually exclusive and have the exact same behavior.
|
|
128
136
|
timeout (`float`, `optional`):
|
|
129
137
|
The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
|
|
130
|
-
headers (`
|
|
138
|
+
headers (`dict[str, str]`, `optional`):
|
|
131
139
|
Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
|
|
132
140
|
Values in this dictionary will override the default values.
|
|
133
141
|
bill_to (`str`, `optional`):
|
|
134
142
|
The billing account to use for the requests. By default the requests are billed on the user's account.
|
|
135
143
|
Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
|
|
136
|
-
cookies (`
|
|
144
|
+
cookies (`dict[str, str]`, `optional`):
|
|
137
145
|
Additional cookies to send to the server.
|
|
138
|
-
trust_env ('bool', 'optional'):
|
|
139
|
-
Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
|
|
140
|
-
proxies (`Any`, `optional`):
|
|
141
|
-
Proxies to use for the request.
|
|
142
146
|
base_url (`str`, `optional`):
|
|
143
147
|
Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
|
|
144
148
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
|
|
@@ -147,6 +151,7 @@ class AsyncInferenceClient:
|
|
|
147
151
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
|
|
148
152
|
"""
|
|
149
153
|
|
|
154
|
+
@validate_hf_hub_args
|
|
150
155
|
def __init__(
|
|
151
156
|
self,
|
|
152
157
|
model: Optional[str] = None,
|
|
@@ -154,10 +159,8 @@ class AsyncInferenceClient:
|
|
|
154
159
|
provider: Optional[PROVIDER_OR_POLICY_T] = None,
|
|
155
160
|
token: Optional[str] = None,
|
|
156
161
|
timeout: Optional[float] = None,
|
|
157
|
-
headers: Optional[
|
|
158
|
-
cookies: Optional[
|
|
159
|
-
trust_env: bool = False,
|
|
160
|
-
proxies: Optional[Any] = None,
|
|
162
|
+
headers: Optional[dict[str, str]] = None,
|
|
163
|
+
cookies: Optional[dict[str, str]] = None,
|
|
161
164
|
bill_to: Optional[str] = None,
|
|
162
165
|
# OpenAI compatibility
|
|
163
166
|
base_url: Optional[str] = None,
|
|
@@ -219,15 +222,36 @@ class AsyncInferenceClient:
|
|
|
219
222
|
|
|
220
223
|
self.cookies = cookies
|
|
221
224
|
self.timeout = timeout
|
|
222
|
-
self.trust_env = trust_env
|
|
223
|
-
self.proxies = proxies
|
|
224
225
|
|
|
225
|
-
|
|
226
|
-
self.
|
|
226
|
+
self.exit_stack = AsyncExitStack()
|
|
227
|
+
self._async_client: Optional[httpx.AsyncClient] = None
|
|
227
228
|
|
|
228
229
|
def __repr__(self):
|
|
229
230
|
return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
|
|
230
231
|
|
|
232
|
+
async def __aenter__(self):
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
236
|
+
await self.close()
|
|
237
|
+
|
|
238
|
+
async def close(self):
|
|
239
|
+
"""Close the client.
|
|
240
|
+
|
|
241
|
+
This method is automatically called when using the client as a context manager.
|
|
242
|
+
"""
|
|
243
|
+
await self.exit_stack.aclose()
|
|
244
|
+
|
|
245
|
+
async def _get_async_client(self):
|
|
246
|
+
"""Get a unique async client for this AsyncInferenceClient instance.
|
|
247
|
+
|
|
248
|
+
Returns the same client instance on subsequent calls, ensuring proper
|
|
249
|
+
connection reuse and resource management through the exit stack.
|
|
250
|
+
"""
|
|
251
|
+
if self._async_client is None:
|
|
252
|
+
self._async_client = await self.exit_stack.enter_async_context(get_async_session())
|
|
253
|
+
return self._async_client
|
|
254
|
+
|
|
231
255
|
@overload
|
|
232
256
|
async def _inner_post( # type: ignore[misc]
|
|
233
257
|
self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
|
|
@@ -236,83 +260,60 @@ class AsyncInferenceClient:
|
|
|
236
260
|
@overload
|
|
237
261
|
async def _inner_post( # type: ignore[misc]
|
|
238
262
|
self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
|
|
239
|
-
) -> AsyncIterable[
|
|
263
|
+
) -> AsyncIterable[str]: ...
|
|
240
264
|
|
|
241
265
|
@overload
|
|
242
266
|
async def _inner_post(
|
|
243
267
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
244
|
-
) -> Union[bytes, AsyncIterable[
|
|
268
|
+
) -> Union[bytes, AsyncIterable[str]]: ...
|
|
245
269
|
|
|
246
270
|
async def _inner_post(
|
|
247
271
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
248
|
-
) -> Union[bytes, AsyncIterable[
|
|
272
|
+
) -> Union[bytes, AsyncIterable[str]]:
|
|
249
273
|
"""Make a request to the inference server."""
|
|
250
274
|
|
|
251
|
-
aiohttp = _import_aiohttp()
|
|
252
|
-
|
|
253
275
|
# TODO: this should be handled in provider helpers directly
|
|
254
276
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
255
277
|
request_parameters.headers["Accept"] = "image/png"
|
|
256
278
|
|
|
257
|
-
# Do not use context manager as we don't want to close the connection immediately when returning
|
|
258
|
-
# a stream
|
|
259
|
-
session = self._get_client_session(headers=request_parameters.headers)
|
|
260
|
-
|
|
261
279
|
try:
|
|
262
|
-
|
|
263
|
-
request_parameters.url, json=request_parameters.json, data=request_parameters.data, proxy=self.proxies
|
|
264
|
-
)
|
|
265
|
-
response_error_payload = None
|
|
266
|
-
if response.status != 200:
|
|
267
|
-
try:
|
|
268
|
-
response_error_payload = await response.json() # get payload before connection closed
|
|
269
|
-
except Exception:
|
|
270
|
-
pass
|
|
271
|
-
response.raise_for_status()
|
|
280
|
+
client = await self._get_async_client()
|
|
272
281
|
if stream:
|
|
273
|
-
|
|
282
|
+
response = await self.exit_stack.enter_async_context(
|
|
283
|
+
client.stream(
|
|
284
|
+
"POST",
|
|
285
|
+
request_parameters.url,
|
|
286
|
+
json=request_parameters.json,
|
|
287
|
+
data=request_parameters.data,
|
|
288
|
+
headers=request_parameters.headers,
|
|
289
|
+
cookies=self.cookies,
|
|
290
|
+
timeout=self.timeout,
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
hf_raise_for_status(response)
|
|
294
|
+
return _async_yield_from(client, response)
|
|
274
295
|
else:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
296
|
+
response = await client.post(
|
|
297
|
+
request_parameters.url,
|
|
298
|
+
json=request_parameters.json,
|
|
299
|
+
data=request_parameters.data,
|
|
300
|
+
headers=request_parameters.headers,
|
|
301
|
+
cookies=self.cookies,
|
|
302
|
+
timeout=self.timeout,
|
|
303
|
+
)
|
|
304
|
+
hf_raise_for_status(response)
|
|
305
|
+
return response.content
|
|
278
306
|
except asyncio.TimeoutError as error:
|
|
279
|
-
await session.close()
|
|
280
307
|
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
281
308
|
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
282
|
-
except
|
|
283
|
-
error.
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
309
|
+
except HfHubHTTPError as error:
|
|
310
|
+
if error.response.status_code == 422 and request_parameters.task != "unknown":
|
|
311
|
+
msg = str(error.args[0])
|
|
312
|
+
if len(error.response.text) > 0:
|
|
313
|
+
msg += f"{os.linesep}{error.response.text}{os.linesep}"
|
|
314
|
+
error.args = (msg,) + error.args[1:]
|
|
288
315
|
raise
|
|
289
316
|
|
|
290
|
-
async def __aenter__(self):
|
|
291
|
-
return self
|
|
292
|
-
|
|
293
|
-
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
294
|
-
await self.close()
|
|
295
|
-
|
|
296
|
-
def __del__(self):
|
|
297
|
-
if len(self._sessions) > 0:
|
|
298
|
-
warnings.warn(
|
|
299
|
-
"Deleting 'AsyncInferenceClient' client but some sessions are still open. "
|
|
300
|
-
"This can happen if you've stopped streaming data from the server before the stream was complete. "
|
|
301
|
-
"To close the client properly, you must call `await client.close()` "
|
|
302
|
-
"or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
async def close(self):
|
|
306
|
-
"""Close all open sessions.
|
|
307
|
-
|
|
308
|
-
By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
|
|
309
|
-
are streaming data from the server and you stop before the stream is complete, you must call this method to
|
|
310
|
-
close the session properly.
|
|
311
|
-
|
|
312
|
-
Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
|
|
313
|
-
"""
|
|
314
|
-
await asyncio.gather(*[session.close() for session in self._sessions.keys()])
|
|
315
|
-
|
|
316
317
|
async def audio_classification(
|
|
317
318
|
self,
|
|
318
319
|
audio: ContentT,
|
|
@@ -320,7 +321,7 @@ class AsyncInferenceClient:
|
|
|
320
321
|
model: Optional[str] = None,
|
|
321
322
|
top_k: Optional[int] = None,
|
|
322
323
|
function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
|
|
323
|
-
) ->
|
|
324
|
+
) -> list[AudioClassificationOutputElement]:
|
|
324
325
|
"""
|
|
325
326
|
Perform audio classification on the provided audio content.
|
|
326
327
|
|
|
@@ -338,12 +339,12 @@ class AsyncInferenceClient:
|
|
|
338
339
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
339
340
|
|
|
340
341
|
Returns:
|
|
341
|
-
`
|
|
342
|
+
`list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
342
343
|
|
|
343
344
|
Raises:
|
|
344
345
|
[`InferenceTimeoutError`]:
|
|
345
346
|
If the model is unavailable or the request times out.
|
|
346
|
-
`
|
|
347
|
+
[`HfHubHTTPError`]:
|
|
347
348
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
348
349
|
|
|
349
350
|
Example:
|
|
@@ -376,7 +377,7 @@ class AsyncInferenceClient:
|
|
|
376
377
|
audio: ContentT,
|
|
377
378
|
*,
|
|
378
379
|
model: Optional[str] = None,
|
|
379
|
-
) ->
|
|
380
|
+
) -> list[AudioToAudioOutputElement]:
|
|
380
381
|
"""
|
|
381
382
|
Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
|
|
382
383
|
|
|
@@ -390,12 +391,12 @@ class AsyncInferenceClient:
|
|
|
390
391
|
audio_to_audio will be used.
|
|
391
392
|
|
|
392
393
|
Returns:
|
|
393
|
-
`
|
|
394
|
+
`list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
|
|
394
395
|
|
|
395
396
|
Raises:
|
|
396
397
|
`InferenceTimeoutError`:
|
|
397
398
|
If the model is unavailable or the request times out.
|
|
398
|
-
`
|
|
399
|
+
[`HfHubHTTPError`]:
|
|
399
400
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
400
401
|
|
|
401
402
|
Example:
|
|
@@ -429,7 +430,7 @@ class AsyncInferenceClient:
|
|
|
429
430
|
audio: ContentT,
|
|
430
431
|
*,
|
|
431
432
|
model: Optional[str] = None,
|
|
432
|
-
extra_body: Optional[
|
|
433
|
+
extra_body: Optional[dict] = None,
|
|
433
434
|
) -> AutomaticSpeechRecognitionOutput:
|
|
434
435
|
"""
|
|
435
436
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -440,7 +441,7 @@ class AsyncInferenceClient:
|
|
|
440
441
|
model (`str`, *optional*):
|
|
441
442
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
442
443
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
443
|
-
extra_body (`
|
|
444
|
+
extra_body (`dict`, *optional*):
|
|
444
445
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
445
446
|
for supported parameters.
|
|
446
447
|
Returns:
|
|
@@ -449,7 +450,7 @@ class AsyncInferenceClient:
|
|
|
449
450
|
Raises:
|
|
450
451
|
[`InferenceTimeoutError`]:
|
|
451
452
|
If the model is unavailable or the request times out.
|
|
452
|
-
`
|
|
453
|
+
[`HfHubHTTPError`]:
|
|
453
454
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
454
455
|
|
|
455
456
|
Example:
|
|
@@ -476,105 +477,105 @@ class AsyncInferenceClient:
|
|
|
476
477
|
@overload
|
|
477
478
|
async def chat_completion( # type: ignore
|
|
478
479
|
self,
|
|
479
|
-
messages:
|
|
480
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
480
481
|
*,
|
|
481
482
|
model: Optional[str] = None,
|
|
482
483
|
stream: Literal[False] = False,
|
|
483
484
|
frequency_penalty: Optional[float] = None,
|
|
484
|
-
logit_bias: Optional[
|
|
485
|
+
logit_bias: Optional[list[float]] = None,
|
|
485
486
|
logprobs: Optional[bool] = None,
|
|
486
487
|
max_tokens: Optional[int] = None,
|
|
487
488
|
n: Optional[int] = None,
|
|
488
489
|
presence_penalty: Optional[float] = None,
|
|
489
490
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
490
491
|
seed: Optional[int] = None,
|
|
491
|
-
stop: Optional[
|
|
492
|
+
stop: Optional[list[str]] = None,
|
|
492
493
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
493
494
|
temperature: Optional[float] = None,
|
|
494
495
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
495
496
|
tool_prompt: Optional[str] = None,
|
|
496
|
-
tools: Optional[
|
|
497
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
497
498
|
top_logprobs: Optional[int] = None,
|
|
498
499
|
top_p: Optional[float] = None,
|
|
499
|
-
extra_body: Optional[
|
|
500
|
+
extra_body: Optional[dict] = None,
|
|
500
501
|
) -> ChatCompletionOutput: ...
|
|
501
502
|
|
|
502
503
|
@overload
|
|
503
504
|
async def chat_completion( # type: ignore
|
|
504
505
|
self,
|
|
505
|
-
messages:
|
|
506
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
506
507
|
*,
|
|
507
508
|
model: Optional[str] = None,
|
|
508
509
|
stream: Literal[True] = True,
|
|
509
510
|
frequency_penalty: Optional[float] = None,
|
|
510
|
-
logit_bias: Optional[
|
|
511
|
+
logit_bias: Optional[list[float]] = None,
|
|
511
512
|
logprobs: Optional[bool] = None,
|
|
512
513
|
max_tokens: Optional[int] = None,
|
|
513
514
|
n: Optional[int] = None,
|
|
514
515
|
presence_penalty: Optional[float] = None,
|
|
515
516
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
516
517
|
seed: Optional[int] = None,
|
|
517
|
-
stop: Optional[
|
|
518
|
+
stop: Optional[list[str]] = None,
|
|
518
519
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
519
520
|
temperature: Optional[float] = None,
|
|
520
521
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
521
522
|
tool_prompt: Optional[str] = None,
|
|
522
|
-
tools: Optional[
|
|
523
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
523
524
|
top_logprobs: Optional[int] = None,
|
|
524
525
|
top_p: Optional[float] = None,
|
|
525
|
-
extra_body: Optional[
|
|
526
|
+
extra_body: Optional[dict] = None,
|
|
526
527
|
) -> AsyncIterable[ChatCompletionStreamOutput]: ...
|
|
527
528
|
|
|
528
529
|
@overload
|
|
529
530
|
async def chat_completion(
|
|
530
531
|
self,
|
|
531
|
-
messages:
|
|
532
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
532
533
|
*,
|
|
533
534
|
model: Optional[str] = None,
|
|
534
535
|
stream: bool = False,
|
|
535
536
|
frequency_penalty: Optional[float] = None,
|
|
536
|
-
logit_bias: Optional[
|
|
537
|
+
logit_bias: Optional[list[float]] = None,
|
|
537
538
|
logprobs: Optional[bool] = None,
|
|
538
539
|
max_tokens: Optional[int] = None,
|
|
539
540
|
n: Optional[int] = None,
|
|
540
541
|
presence_penalty: Optional[float] = None,
|
|
541
542
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
542
543
|
seed: Optional[int] = None,
|
|
543
|
-
stop: Optional[
|
|
544
|
+
stop: Optional[list[str]] = None,
|
|
544
545
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
545
546
|
temperature: Optional[float] = None,
|
|
546
547
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
547
548
|
tool_prompt: Optional[str] = None,
|
|
548
|
-
tools: Optional[
|
|
549
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
549
550
|
top_logprobs: Optional[int] = None,
|
|
550
551
|
top_p: Optional[float] = None,
|
|
551
|
-
extra_body: Optional[
|
|
552
|
+
extra_body: Optional[dict] = None,
|
|
552
553
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
|
|
553
554
|
|
|
554
555
|
async def chat_completion(
|
|
555
556
|
self,
|
|
556
|
-
messages:
|
|
557
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
557
558
|
*,
|
|
558
559
|
model: Optional[str] = None,
|
|
559
560
|
stream: bool = False,
|
|
560
561
|
# Parameters from ChatCompletionInput (handled manually)
|
|
561
562
|
frequency_penalty: Optional[float] = None,
|
|
562
|
-
logit_bias: Optional[
|
|
563
|
+
logit_bias: Optional[list[float]] = None,
|
|
563
564
|
logprobs: Optional[bool] = None,
|
|
564
565
|
max_tokens: Optional[int] = None,
|
|
565
566
|
n: Optional[int] = None,
|
|
566
567
|
presence_penalty: Optional[float] = None,
|
|
567
568
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
568
569
|
seed: Optional[int] = None,
|
|
569
|
-
stop: Optional[
|
|
570
|
+
stop: Optional[list[str]] = None,
|
|
570
571
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
571
572
|
temperature: Optional[float] = None,
|
|
572
573
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
573
574
|
tool_prompt: Optional[str] = None,
|
|
574
|
-
tools: Optional[
|
|
575
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
575
576
|
top_logprobs: Optional[int] = None,
|
|
576
577
|
top_p: Optional[float] = None,
|
|
577
|
-
extra_body: Optional[
|
|
578
|
+
extra_body: Optional[dict] = None,
|
|
578
579
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
|
|
579
580
|
"""
|
|
580
581
|
A method for completing conversations using a specified language model.
|
|
@@ -604,7 +605,7 @@ class AsyncInferenceClient:
|
|
|
604
605
|
frequency_penalty (`float`, *optional*):
|
|
605
606
|
Penalizes new tokens based on their existing frequency
|
|
606
607
|
in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
|
|
607
|
-
logit_bias (`
|
|
608
|
+
logit_bias (`list[float]`, *optional*):
|
|
608
609
|
Adjusts the likelihood of specific tokens appearing in the generated output.
|
|
609
610
|
logprobs (`bool`, *optional*):
|
|
610
611
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
@@ -620,7 +621,7 @@ class AsyncInferenceClient:
|
|
|
620
621
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
621
622
|
seed (Optional[`int`], *optional*):
|
|
622
623
|
Seed for reproducible control flow. Defaults to None.
|
|
623
|
-
stop (`
|
|
624
|
+
stop (`list[str]`, *optional*):
|
|
624
625
|
Up to four strings which trigger the end of the response.
|
|
625
626
|
Defaults to None.
|
|
626
627
|
stream (`bool`, *optional*):
|
|
@@ -644,7 +645,7 @@ class AsyncInferenceClient:
|
|
|
644
645
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
645
646
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
646
647
|
provide a list of functions the model may generate JSON inputs for.
|
|
647
|
-
extra_body (`
|
|
648
|
+
extra_body (`dict`, *optional*):
|
|
648
649
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
649
650
|
for supported parameters.
|
|
650
651
|
Returns:
|
|
@@ -656,7 +657,7 @@ class AsyncInferenceClient:
|
|
|
656
657
|
Raises:
|
|
657
658
|
[`InferenceTimeoutError`]:
|
|
658
659
|
If the model is unavailable or the request times out.
|
|
659
|
-
`
|
|
660
|
+
[`HfHubHTTPError`]:
|
|
660
661
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
661
662
|
|
|
662
663
|
Example:
|
|
@@ -976,8 +977,8 @@ class AsyncInferenceClient:
|
|
|
976
977
|
max_question_len: Optional[int] = None,
|
|
977
978
|
max_seq_len: Optional[int] = None,
|
|
978
979
|
top_k: Optional[int] = None,
|
|
979
|
-
word_boxes: Optional[
|
|
980
|
-
) ->
|
|
980
|
+
word_boxes: Optional[list[Union[list[float], str]]] = None,
|
|
981
|
+
) -> list[DocumentQuestionAnsweringOutputElement]:
|
|
981
982
|
"""
|
|
982
983
|
Answer questions on document images.
|
|
983
984
|
|
|
@@ -1007,16 +1008,16 @@ class AsyncInferenceClient:
|
|
|
1007
1008
|
top_k (`int`, *optional*):
|
|
1008
1009
|
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
1009
1010
|
answers if there are not enough options available within the context.
|
|
1010
|
-
word_boxes (`
|
|
1011
|
+
word_boxes (`list[Union[list[float], str`, *optional*):
|
|
1011
1012
|
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
1012
1013
|
step and use the provided bounding boxes instead.
|
|
1013
1014
|
Returns:
|
|
1014
|
-
`
|
|
1015
|
+
`list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
1015
1016
|
|
|
1016
1017
|
Raises:
|
|
1017
1018
|
[`InferenceTimeoutError`]:
|
|
1018
1019
|
If the model is unavailable or the request times out.
|
|
1019
|
-
`
|
|
1020
|
+
[`HfHubHTTPError`]:
|
|
1020
1021
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1021
1022
|
|
|
1022
1023
|
|
|
@@ -1031,7 +1032,7 @@ class AsyncInferenceClient:
|
|
|
1031
1032
|
"""
|
|
1032
1033
|
model_id = model or self.model
|
|
1033
1034
|
provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
|
|
1034
|
-
inputs:
|
|
1035
|
+
inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
1035
1036
|
request_parameters = provider_helper.prepare_request(
|
|
1036
1037
|
inputs=inputs,
|
|
1037
1038
|
parameters={
|
|
@@ -1092,7 +1093,7 @@ class AsyncInferenceClient:
|
|
|
1092
1093
|
Raises:
|
|
1093
1094
|
[`InferenceTimeoutError`]:
|
|
1094
1095
|
If the model is unavailable or the request times out.
|
|
1095
|
-
`
|
|
1096
|
+
[`HfHubHTTPError`]:
|
|
1096
1097
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1097
1098
|
|
|
1098
1099
|
Example:
|
|
@@ -1130,9 +1131,9 @@ class AsyncInferenceClient:
|
|
|
1130
1131
|
text: str,
|
|
1131
1132
|
*,
|
|
1132
1133
|
model: Optional[str] = None,
|
|
1133
|
-
targets: Optional[
|
|
1134
|
+
targets: Optional[list[str]] = None,
|
|
1134
1135
|
top_k: Optional[int] = None,
|
|
1135
|
-
) ->
|
|
1136
|
+
) -> list[FillMaskOutputElement]:
|
|
1136
1137
|
"""
|
|
1137
1138
|
Fill in a hole with a missing word (token to be precise).
|
|
1138
1139
|
|
|
@@ -1142,20 +1143,20 @@ class AsyncInferenceClient:
|
|
|
1142
1143
|
model (`str`, *optional*):
|
|
1143
1144
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1144
1145
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1145
|
-
targets (`
|
|
1146
|
+
targets (`list[str`, *optional*):
|
|
1146
1147
|
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1147
1148
|
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1148
1149
|
resulting token will be used (with a warning, and that might be slower).
|
|
1149
1150
|
top_k (`int`, *optional*):
|
|
1150
1151
|
When passed, overrides the number of predictions to return.
|
|
1151
1152
|
Returns:
|
|
1152
|
-
`
|
|
1153
|
+
`list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
|
|
1153
1154
|
probability, token reference, and completed text.
|
|
1154
1155
|
|
|
1155
1156
|
Raises:
|
|
1156
1157
|
[`InferenceTimeoutError`]:
|
|
1157
1158
|
If the model is unavailable or the request times out.
|
|
1158
|
-
`
|
|
1159
|
+
[`HfHubHTTPError`]:
|
|
1159
1160
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1160
1161
|
|
|
1161
1162
|
Example:
|
|
@@ -1189,7 +1190,7 @@ class AsyncInferenceClient:
|
|
|
1189
1190
|
model: Optional[str] = None,
|
|
1190
1191
|
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1191
1192
|
top_k: Optional[int] = None,
|
|
1192
|
-
) ->
|
|
1193
|
+
) -> list[ImageClassificationOutputElement]:
|
|
1193
1194
|
"""
|
|
1194
1195
|
Perform image classification on the given image using the specified model.
|
|
1195
1196
|
|
|
@@ -1204,12 +1205,12 @@ class AsyncInferenceClient:
|
|
|
1204
1205
|
top_k (`int`, *optional*):
|
|
1205
1206
|
When specified, limits the output to the top K most probable classes.
|
|
1206
1207
|
Returns:
|
|
1207
|
-
`
|
|
1208
|
+
`list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1208
1209
|
|
|
1209
1210
|
Raises:
|
|
1210
1211
|
[`InferenceTimeoutError`]:
|
|
1211
1212
|
If the model is unavailable or the request times out.
|
|
1212
|
-
`
|
|
1213
|
+
[`HfHubHTTPError`]:
|
|
1213
1214
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1214
1215
|
|
|
1215
1216
|
Example:
|
|
@@ -1242,7 +1243,7 @@ class AsyncInferenceClient:
|
|
|
1242
1243
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1243
1244
|
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1244
1245
|
threshold: Optional[float] = None,
|
|
1245
|
-
) ->
|
|
1246
|
+
) -> list[ImageSegmentationOutputElement]:
|
|
1246
1247
|
"""
|
|
1247
1248
|
Perform image segmentation on the given image using the specified model.
|
|
1248
1249
|
|
|
@@ -1267,12 +1268,12 @@ class AsyncInferenceClient:
|
|
|
1267
1268
|
threshold (`float`, *optional*):
|
|
1268
1269
|
Probability threshold to filter out predicted masks.
|
|
1269
1270
|
Returns:
|
|
1270
|
-
`
|
|
1271
|
+
`list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
|
|
1271
1272
|
|
|
1272
1273
|
Raises:
|
|
1273
1274
|
[`InferenceTimeoutError`]:
|
|
1274
1275
|
If the model is unavailable or the request times out.
|
|
1275
|
-
`
|
|
1276
|
+
[`HfHubHTTPError`]:
|
|
1276
1277
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1277
1278
|
|
|
1278
1279
|
Example:
|
|
@@ -1350,7 +1351,7 @@ class AsyncInferenceClient:
|
|
|
1350
1351
|
Raises:
|
|
1351
1352
|
[`InferenceTimeoutError`]:
|
|
1352
1353
|
If the model is unavailable or the request times out.
|
|
1353
|
-
`
|
|
1354
|
+
[`HfHubHTTPError`]:
|
|
1354
1355
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1355
1356
|
|
|
1356
1357
|
Example:
|
|
@@ -1482,7 +1483,7 @@ class AsyncInferenceClient:
|
|
|
1482
1483
|
Raises:
|
|
1483
1484
|
[`InferenceTimeoutError`]:
|
|
1484
1485
|
If the model is unavailable or the request times out.
|
|
1485
|
-
`
|
|
1486
|
+
[`HfHubHTTPError`]:
|
|
1486
1487
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1487
1488
|
|
|
1488
1489
|
Example:
|
|
@@ -1506,12 +1507,12 @@ class AsyncInferenceClient:
|
|
|
1506
1507
|
api_key=self.token,
|
|
1507
1508
|
)
|
|
1508
1509
|
response = await self._inner_post(request_parameters)
|
|
1509
|
-
output_list:
|
|
1510
|
+
output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
|
|
1510
1511
|
return output_list[0]
|
|
1511
1512
|
|
|
1512
1513
|
async def object_detection(
|
|
1513
1514
|
self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
|
|
1514
|
-
) ->
|
|
1515
|
+
) -> list[ObjectDetectionOutputElement]:
|
|
1515
1516
|
"""
|
|
1516
1517
|
Perform object detection on the given image using the specified model.
|
|
1517
1518
|
|
|
@@ -1530,12 +1531,12 @@ class AsyncInferenceClient:
|
|
|
1530
1531
|
threshold (`float`, *optional*):
|
|
1531
1532
|
The probability necessary to make a prediction.
|
|
1532
1533
|
Returns:
|
|
1533
|
-
`
|
|
1534
|
+
`list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
|
|
1534
1535
|
|
|
1535
1536
|
Raises:
|
|
1536
1537
|
[`InferenceTimeoutError`]:
|
|
1537
1538
|
If the model is unavailable or the request times out.
|
|
1538
|
-
`
|
|
1539
|
+
[`HfHubHTTPError`]:
|
|
1539
1540
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1540
1541
|
`ValueError`:
|
|
1541
1542
|
If the request output is not a List.
|
|
@@ -1574,7 +1575,7 @@ class AsyncInferenceClient:
|
|
|
1574
1575
|
max_question_len: Optional[int] = None,
|
|
1575
1576
|
max_seq_len: Optional[int] = None,
|
|
1576
1577
|
top_k: Optional[int] = None,
|
|
1577
|
-
) -> Union[QuestionAnsweringOutputElement,
|
|
1578
|
+
) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
|
|
1578
1579
|
"""
|
|
1579
1580
|
Retrieve the answer to a question from a given text.
|
|
1580
1581
|
|
|
@@ -1606,13 +1607,13 @@ class AsyncInferenceClient:
|
|
|
1606
1607
|
topk answers if there are not enough options available within the context.
|
|
1607
1608
|
|
|
1608
1609
|
Returns:
|
|
1609
|
-
Union[`QuestionAnsweringOutputElement`,
|
|
1610
|
+
Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
|
|
1610
1611
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
1611
1612
|
When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
|
|
1612
1613
|
Raises:
|
|
1613
1614
|
[`InferenceTimeoutError`]:
|
|
1614
1615
|
If the model is unavailable or the request times out.
|
|
1615
|
-
`
|
|
1616
|
+
[`HfHubHTTPError`]:
|
|
1616
1617
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1617
1618
|
|
|
1618
1619
|
Example:
|
|
@@ -1647,15 +1648,15 @@ class AsyncInferenceClient:
|
|
|
1647
1648
|
return output
|
|
1648
1649
|
|
|
1649
1650
|
async def sentence_similarity(
|
|
1650
|
-
self, sentence: str, other_sentences:
|
|
1651
|
-
) ->
|
|
1651
|
+
self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
|
|
1652
|
+
) -> list[float]:
|
|
1652
1653
|
"""
|
|
1653
1654
|
Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
|
|
1654
1655
|
|
|
1655
1656
|
Args:
|
|
1656
1657
|
sentence (`str`):
|
|
1657
1658
|
The main sentence to compare to others.
|
|
1658
|
-
other_sentences (`
|
|
1659
|
+
other_sentences (`list[str]`):
|
|
1659
1660
|
The list of sentences to compare to.
|
|
1660
1661
|
model (`str`, *optional*):
|
|
1661
1662
|
The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1663,12 +1664,12 @@ class AsyncInferenceClient:
|
|
|
1663
1664
|
Defaults to None.
|
|
1664
1665
|
|
|
1665
1666
|
Returns:
|
|
1666
|
-
`
|
|
1667
|
+
`list[float]`: The embedding representing the input text.
|
|
1667
1668
|
|
|
1668
1669
|
Raises:
|
|
1669
1670
|
[`InferenceTimeoutError`]:
|
|
1670
1671
|
If the model is unavailable or the request times out.
|
|
1671
|
-
`
|
|
1672
|
+
[`HfHubHTTPError`]:
|
|
1672
1673
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1673
1674
|
|
|
1674
1675
|
Example:
|
|
@@ -1706,7 +1707,7 @@ class AsyncInferenceClient:
|
|
|
1706
1707
|
*,
|
|
1707
1708
|
model: Optional[str] = None,
|
|
1708
1709
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1709
|
-
generate_parameters: Optional[
|
|
1710
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
1710
1711
|
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1711
1712
|
) -> SummarizationOutput:
|
|
1712
1713
|
"""
|
|
@@ -1720,7 +1721,7 @@ class AsyncInferenceClient:
|
|
|
1720
1721
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
1721
1722
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
1722
1723
|
Whether to clean up the potential extra spaces in the text output.
|
|
1723
|
-
generate_parameters (`
|
|
1724
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
1724
1725
|
Additional parametrization of the text generation algorithm.
|
|
1725
1726
|
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1726
1727
|
The truncation strategy to use.
|
|
@@ -1730,7 +1731,7 @@ class AsyncInferenceClient:
|
|
|
1730
1731
|
Raises:
|
|
1731
1732
|
[`InferenceTimeoutError`]:
|
|
1732
1733
|
If the model is unavailable or the request times out.
|
|
1733
|
-
`
|
|
1734
|
+
[`HfHubHTTPError`]:
|
|
1734
1735
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1735
1736
|
|
|
1736
1737
|
Example:
|
|
@@ -1761,7 +1762,7 @@ class AsyncInferenceClient:
|
|
|
1761
1762
|
|
|
1762
1763
|
async def table_question_answering(
|
|
1763
1764
|
self,
|
|
1764
|
-
table:
|
|
1765
|
+
table: dict[str, Any],
|
|
1765
1766
|
query: str,
|
|
1766
1767
|
*,
|
|
1767
1768
|
model: Optional[str] = None,
|
|
@@ -1796,7 +1797,7 @@ class AsyncInferenceClient:
|
|
|
1796
1797
|
Raises:
|
|
1797
1798
|
[`InferenceTimeoutError`]:
|
|
1798
1799
|
If the model is unavailable or the request times out.
|
|
1799
|
-
`
|
|
1800
|
+
[`HfHubHTTPError`]:
|
|
1800
1801
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1801
1802
|
|
|
1802
1803
|
Example:
|
|
@@ -1822,12 +1823,12 @@ class AsyncInferenceClient:
|
|
|
1822
1823
|
response = await self._inner_post(request_parameters)
|
|
1823
1824
|
return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
|
|
1824
1825
|
|
|
1825
|
-
async def tabular_classification(self, table:
|
|
1826
|
+
async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
|
|
1826
1827
|
"""
|
|
1827
1828
|
Classifying a target category (a group) based on a set of attributes.
|
|
1828
1829
|
|
|
1829
1830
|
Args:
|
|
1830
|
-
table (`
|
|
1831
|
+
table (`dict[str, Any]`):
|
|
1831
1832
|
Set of attributes to classify.
|
|
1832
1833
|
model (`str`, *optional*):
|
|
1833
1834
|
The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1840,7 +1841,7 @@ class AsyncInferenceClient:
|
|
|
1840
1841
|
Raises:
|
|
1841
1842
|
[`InferenceTimeoutError`]:
|
|
1842
1843
|
If the model is unavailable or the request times out.
|
|
1843
|
-
`
|
|
1844
|
+
[`HfHubHTTPError`]:
|
|
1844
1845
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1845
1846
|
|
|
1846
1847
|
Example:
|
|
@@ -1878,12 +1879,12 @@ class AsyncInferenceClient:
|
|
|
1878
1879
|
response = await self._inner_post(request_parameters)
|
|
1879
1880
|
return _bytes_to_list(response)
|
|
1880
1881
|
|
|
1881
|
-
async def tabular_regression(self, table:
|
|
1882
|
+
async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
|
|
1882
1883
|
"""
|
|
1883
1884
|
Predicting a numerical target value given a set of attributes/features in a table.
|
|
1884
1885
|
|
|
1885
1886
|
Args:
|
|
1886
|
-
table (`
|
|
1887
|
+
table (`dict[str, Any]`):
|
|
1887
1888
|
Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
|
|
1888
1889
|
model (`str`, *optional*):
|
|
1889
1890
|
The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1896,7 +1897,7 @@ class AsyncInferenceClient:
|
|
|
1896
1897
|
Raises:
|
|
1897
1898
|
[`InferenceTimeoutError`]:
|
|
1898
1899
|
If the model is unavailable or the request times out.
|
|
1899
|
-
`
|
|
1900
|
+
[`HfHubHTTPError`]:
|
|
1900
1901
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1901
1902
|
|
|
1902
1903
|
Example:
|
|
@@ -1936,7 +1937,7 @@ class AsyncInferenceClient:
|
|
|
1936
1937
|
model: Optional[str] = None,
|
|
1937
1938
|
top_k: Optional[int] = None,
|
|
1938
1939
|
function_to_apply: Optional["TextClassificationOutputTransform"] = None,
|
|
1939
|
-
) ->
|
|
1940
|
+
) -> list[TextClassificationOutputElement]:
|
|
1940
1941
|
"""
|
|
1941
1942
|
Perform text classification (e.g. sentiment-analysis) on the given text.
|
|
1942
1943
|
|
|
@@ -1953,12 +1954,12 @@ class AsyncInferenceClient:
|
|
|
1953
1954
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
1954
1955
|
|
|
1955
1956
|
Returns:
|
|
1956
|
-
`
|
|
1957
|
+
`list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1957
1958
|
|
|
1958
1959
|
Raises:
|
|
1959
1960
|
[`InferenceTimeoutError`]:
|
|
1960
1961
|
If the model is unavailable or the request times out.
|
|
1961
|
-
`
|
|
1962
|
+
[`HfHubHTTPError`]:
|
|
1962
1963
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1963
1964
|
|
|
1964
1965
|
Example:
|
|
@@ -2007,8 +2008,8 @@ class AsyncInferenceClient:
|
|
|
2007
2008
|
repetition_penalty: Optional[float] = None,
|
|
2008
2009
|
return_full_text: Optional[bool] = None,
|
|
2009
2010
|
seed: Optional[int] = None,
|
|
2010
|
-
stop: Optional[
|
|
2011
|
-
stop_sequences: Optional[
|
|
2011
|
+
stop: Optional[list[str]] = None,
|
|
2012
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2012
2013
|
temperature: Optional[float] = None,
|
|
2013
2014
|
top_k: Optional[int] = None,
|
|
2014
2015
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2037,8 +2038,8 @@ class AsyncInferenceClient:
|
|
|
2037
2038
|
repetition_penalty: Optional[float] = None,
|
|
2038
2039
|
return_full_text: Optional[bool] = None,
|
|
2039
2040
|
seed: Optional[int] = None,
|
|
2040
|
-
stop: Optional[
|
|
2041
|
-
stop_sequences: Optional[
|
|
2041
|
+
stop: Optional[list[str]] = None,
|
|
2042
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2042
2043
|
temperature: Optional[float] = None,
|
|
2043
2044
|
top_k: Optional[int] = None,
|
|
2044
2045
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2067,8 +2068,8 @@ class AsyncInferenceClient:
|
|
|
2067
2068
|
repetition_penalty: Optional[float] = None,
|
|
2068
2069
|
return_full_text: Optional[bool] = None, # Manual default value
|
|
2069
2070
|
seed: Optional[int] = None,
|
|
2070
|
-
stop: Optional[
|
|
2071
|
-
stop_sequences: Optional[
|
|
2071
|
+
stop: Optional[list[str]] = None,
|
|
2072
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2072
2073
|
temperature: Optional[float] = None,
|
|
2073
2074
|
top_k: Optional[int] = None,
|
|
2074
2075
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2097,8 +2098,8 @@ class AsyncInferenceClient:
|
|
|
2097
2098
|
repetition_penalty: Optional[float] = None,
|
|
2098
2099
|
return_full_text: Optional[bool] = None,
|
|
2099
2100
|
seed: Optional[int] = None,
|
|
2100
|
-
stop: Optional[
|
|
2101
|
-
stop_sequences: Optional[
|
|
2101
|
+
stop: Optional[list[str]] = None,
|
|
2102
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2102
2103
|
temperature: Optional[float] = None,
|
|
2103
2104
|
top_k: Optional[int] = None,
|
|
2104
2105
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2127,8 +2128,8 @@ class AsyncInferenceClient:
|
|
|
2127
2128
|
repetition_penalty: Optional[float] = None,
|
|
2128
2129
|
return_full_text: Optional[bool] = None,
|
|
2129
2130
|
seed: Optional[int] = None,
|
|
2130
|
-
stop: Optional[
|
|
2131
|
-
stop_sequences: Optional[
|
|
2131
|
+
stop: Optional[list[str]] = None,
|
|
2132
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2132
2133
|
temperature: Optional[float] = None,
|
|
2133
2134
|
top_k: Optional[int] = None,
|
|
2134
2135
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2156,8 +2157,8 @@ class AsyncInferenceClient:
|
|
|
2156
2157
|
repetition_penalty: Optional[float] = None,
|
|
2157
2158
|
return_full_text: Optional[bool] = None,
|
|
2158
2159
|
seed: Optional[int] = None,
|
|
2159
|
-
stop: Optional[
|
|
2160
|
-
stop_sequences: Optional[
|
|
2160
|
+
stop: Optional[list[str]] = None,
|
|
2161
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2161
2162
|
temperature: Optional[float] = None,
|
|
2162
2163
|
top_k: Optional[int] = None,
|
|
2163
2164
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2213,9 +2214,9 @@ class AsyncInferenceClient:
|
|
|
2213
2214
|
Whether to prepend the prompt to the generated text
|
|
2214
2215
|
seed (`int`, *optional*):
|
|
2215
2216
|
Random sampling seed
|
|
2216
|
-
stop (`
|
|
2217
|
+
stop (`list[str]`, *optional*):
|
|
2217
2218
|
Stop generating tokens if a member of `stop` is generated.
|
|
2218
|
-
stop_sequences (`
|
|
2219
|
+
stop_sequences (`list[str]`, *optional*):
|
|
2219
2220
|
Deprecated argument. Use `stop` instead.
|
|
2220
2221
|
temperature (`float`, *optional*):
|
|
2221
2222
|
The value used to module the logits distribution.
|
|
@@ -2236,10 +2237,10 @@ class AsyncInferenceClient:
|
|
|
2236
2237
|
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
|
|
2237
2238
|
|
|
2238
2239
|
Returns:
|
|
2239
|
-
`Union[str, TextGenerationOutput,
|
|
2240
|
+
`Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
|
|
2240
2241
|
Generated text returned from the server:
|
|
2241
2242
|
- if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
|
|
2242
|
-
- if `stream=True` and `details=False`, the generated text is returned token by token as a `
|
|
2243
|
+
- if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
|
|
2243
2244
|
- if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
|
|
2244
2245
|
- if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
|
|
2245
2246
|
|
|
@@ -2248,7 +2249,7 @@ class AsyncInferenceClient:
|
|
|
2248
2249
|
If input values are not valid. No HTTP call is made to the server.
|
|
2249
2250
|
[`InferenceTimeoutError`]:
|
|
2250
2251
|
If the model is unavailable or the request times out.
|
|
2251
|
-
`
|
|
2252
|
+
[`HfHubHTTPError`]:
|
|
2252
2253
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2253
2254
|
|
|
2254
2255
|
Example:
|
|
@@ -2438,9 +2439,9 @@ class AsyncInferenceClient:
|
|
|
2438
2439
|
# Handle errors separately for more precise error messages
|
|
2439
2440
|
try:
|
|
2440
2441
|
bytes_output = await self._inner_post(request_parameters, stream=stream or False)
|
|
2441
|
-
except
|
|
2442
|
-
match = MODEL_KWARGS_NOT_USED_REGEX.search(e
|
|
2443
|
-
if e
|
|
2442
|
+
except HfHubHTTPError as e:
|
|
2443
|
+
match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
|
|
2444
|
+
if isinstance(e, BadRequestError) and match:
|
|
2444
2445
|
unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
|
|
2445
2446
|
_set_unsupported_text_generation_kwargs(model, unused_params)
|
|
2446
2447
|
return await self.text_generation( # type: ignore
|
|
@@ -2493,7 +2494,7 @@ class AsyncInferenceClient:
|
|
|
2493
2494
|
model: Optional[str] = None,
|
|
2494
2495
|
scheduler: Optional[str] = None,
|
|
2495
2496
|
seed: Optional[int] = None,
|
|
2496
|
-
extra_body: Optional[
|
|
2497
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2497
2498
|
) -> "Image":
|
|
2498
2499
|
"""
|
|
2499
2500
|
Generate an image based on a given text using a specified model.
|
|
@@ -2531,7 +2532,7 @@ class AsyncInferenceClient:
|
|
|
2531
2532
|
Override the scheduler with a compatible one.
|
|
2532
2533
|
seed (`int`, *optional*):
|
|
2533
2534
|
Seed for the random number generator.
|
|
2534
|
-
extra_body (`
|
|
2535
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2535
2536
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2536
2537
|
for supported parameters.
|
|
2537
2538
|
|
|
@@ -2541,7 +2542,7 @@ class AsyncInferenceClient:
|
|
|
2541
2542
|
Raises:
|
|
2542
2543
|
[`InferenceTimeoutError`]:
|
|
2543
2544
|
If the model is unavailable or the request times out.
|
|
2544
|
-
`
|
|
2545
|
+
[`HfHubHTTPError`]:
|
|
2545
2546
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2546
2547
|
|
|
2547
2548
|
Example:
|
|
@@ -2631,11 +2632,11 @@ class AsyncInferenceClient:
|
|
|
2631
2632
|
*,
|
|
2632
2633
|
model: Optional[str] = None,
|
|
2633
2634
|
guidance_scale: Optional[float] = None,
|
|
2634
|
-
negative_prompt: Optional[
|
|
2635
|
+
negative_prompt: Optional[list[str]] = None,
|
|
2635
2636
|
num_frames: Optional[float] = None,
|
|
2636
2637
|
num_inference_steps: Optional[int] = None,
|
|
2637
2638
|
seed: Optional[int] = None,
|
|
2638
|
-
extra_body: Optional[
|
|
2639
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2639
2640
|
) -> bytes:
|
|
2640
2641
|
"""
|
|
2641
2642
|
Generate a video based on a given text.
|
|
@@ -2654,7 +2655,7 @@ class AsyncInferenceClient:
|
|
|
2654
2655
|
guidance_scale (`float`, *optional*):
|
|
2655
2656
|
A higher guidance scale value encourages the model to generate videos closely linked to the text
|
|
2656
2657
|
prompt, but values too high may cause saturation and other artifacts.
|
|
2657
|
-
negative_prompt (`
|
|
2658
|
+
negative_prompt (`list[str]`, *optional*):
|
|
2658
2659
|
One or several prompt to guide what NOT to include in video generation.
|
|
2659
2660
|
num_frames (`float`, *optional*):
|
|
2660
2661
|
The num_frames parameter determines how many video frames are generated.
|
|
@@ -2663,7 +2664,7 @@ class AsyncInferenceClient:
|
|
|
2663
2664
|
expense of slower inference.
|
|
2664
2665
|
seed (`int`, *optional*):
|
|
2665
2666
|
Seed for the random number generator.
|
|
2666
|
-
extra_body (`
|
|
2667
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2667
2668
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2668
2669
|
for supported parameters.
|
|
2669
2670
|
|
|
@@ -2743,7 +2744,7 @@ class AsyncInferenceClient:
|
|
|
2743
2744
|
top_p: Optional[float] = None,
|
|
2744
2745
|
typical_p: Optional[float] = None,
|
|
2745
2746
|
use_cache: Optional[bool] = None,
|
|
2746
|
-
extra_body: Optional[
|
|
2747
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2747
2748
|
) -> bytes:
|
|
2748
2749
|
"""
|
|
2749
2750
|
Synthesize an audio of a voice pronouncing a given text.
|
|
@@ -2805,7 +2806,7 @@ class AsyncInferenceClient:
|
|
|
2805
2806
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2806
2807
|
use_cache (`bool`, *optional*):
|
|
2807
2808
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2808
|
-
extra_body (`
|
|
2809
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2809
2810
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2810
2811
|
for supported parameters.
|
|
2811
2812
|
Returns:
|
|
@@ -2814,7 +2815,7 @@ class AsyncInferenceClient:
|
|
|
2814
2815
|
Raises:
|
|
2815
2816
|
[`InferenceTimeoutError`]:
|
|
2816
2817
|
If the model is unavailable or the request times out.
|
|
2817
|
-
`
|
|
2818
|
+
[`HfHubHTTPError`]:
|
|
2818
2819
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2819
2820
|
|
|
2820
2821
|
Example:
|
|
@@ -2938,9 +2939,9 @@ class AsyncInferenceClient:
|
|
|
2938
2939
|
*,
|
|
2939
2940
|
model: Optional[str] = None,
|
|
2940
2941
|
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2941
|
-
ignore_labels: Optional[
|
|
2942
|
+
ignore_labels: Optional[list[str]] = None,
|
|
2942
2943
|
stride: Optional[int] = None,
|
|
2943
|
-
) ->
|
|
2944
|
+
) -> list[TokenClassificationOutputElement]:
|
|
2944
2945
|
"""
|
|
2945
2946
|
Perform token classification on the given text.
|
|
2946
2947
|
Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
|
|
@@ -2954,18 +2955,18 @@ class AsyncInferenceClient:
|
|
|
2954
2955
|
Defaults to None.
|
|
2955
2956
|
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2956
2957
|
The strategy used to fuse tokens based on model predictions
|
|
2957
|
-
ignore_labels (`
|
|
2958
|
+
ignore_labels (`list[str`, *optional*):
|
|
2958
2959
|
A list of labels to ignore
|
|
2959
2960
|
stride (`int`, *optional*):
|
|
2960
2961
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2961
2962
|
|
|
2962
2963
|
Returns:
|
|
2963
|
-
`
|
|
2964
|
+
`list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
|
|
2964
2965
|
|
|
2965
2966
|
Raises:
|
|
2966
2967
|
[`InferenceTimeoutError`]:
|
|
2967
2968
|
If the model is unavailable or the request times out.
|
|
2968
|
-
`
|
|
2969
|
+
[`HfHubHTTPError`]:
|
|
2969
2970
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2970
2971
|
|
|
2971
2972
|
Example:
|
|
@@ -3017,7 +3018,7 @@ class AsyncInferenceClient:
|
|
|
3017
3018
|
tgt_lang: Optional[str] = None,
|
|
3018
3019
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
3019
3020
|
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
3020
|
-
generate_parameters: Optional[
|
|
3021
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
3021
3022
|
) -> TranslationOutput:
|
|
3022
3023
|
"""
|
|
3023
3024
|
Convert text from one language to another.
|
|
@@ -3042,7 +3043,7 @@ class AsyncInferenceClient:
|
|
|
3042
3043
|
Whether to clean up the potential extra spaces in the text output.
|
|
3043
3044
|
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
3044
3045
|
The truncation strategy to use.
|
|
3045
|
-
generate_parameters (`
|
|
3046
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
3046
3047
|
Additional parametrization of the text generation algorithm.
|
|
3047
3048
|
|
|
3048
3049
|
Returns:
|
|
@@ -3051,7 +3052,7 @@ class AsyncInferenceClient:
|
|
|
3051
3052
|
Raises:
|
|
3052
3053
|
[`InferenceTimeoutError`]:
|
|
3053
3054
|
If the model is unavailable or the request times out.
|
|
3054
|
-
`
|
|
3055
|
+
[`HfHubHTTPError`]:
|
|
3055
3056
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3056
3057
|
`ValueError`:
|
|
3057
3058
|
If only one of the `src_lang` and `tgt_lang` arguments are provided.
|
|
@@ -3105,7 +3106,7 @@ class AsyncInferenceClient:
|
|
|
3105
3106
|
*,
|
|
3106
3107
|
model: Optional[str] = None,
|
|
3107
3108
|
top_k: Optional[int] = None,
|
|
3108
|
-
) ->
|
|
3109
|
+
) -> list[VisualQuestionAnsweringOutputElement]:
|
|
3109
3110
|
"""
|
|
3110
3111
|
Answering open-ended questions based on an image.
|
|
3111
3112
|
|
|
@@ -3122,12 +3123,12 @@ class AsyncInferenceClient:
|
|
|
3122
3123
|
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
3123
3124
|
topk answers if there are not enough options available within the context.
|
|
3124
3125
|
Returns:
|
|
3125
|
-
`
|
|
3126
|
+
`list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
3126
3127
|
|
|
3127
3128
|
Raises:
|
|
3128
3129
|
`InferenceTimeoutError`:
|
|
3129
3130
|
If the model is unavailable or the request times out.
|
|
3130
|
-
`
|
|
3131
|
+
[`HfHubHTTPError`]:
|
|
3131
3132
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3132
3133
|
|
|
3133
3134
|
Example:
|
|
@@ -3161,21 +3162,21 @@ class AsyncInferenceClient:
|
|
|
3161
3162
|
async def zero_shot_classification(
|
|
3162
3163
|
self,
|
|
3163
3164
|
text: str,
|
|
3164
|
-
candidate_labels:
|
|
3165
|
+
candidate_labels: list[str],
|
|
3165
3166
|
*,
|
|
3166
3167
|
multi_label: Optional[bool] = False,
|
|
3167
3168
|
hypothesis_template: Optional[str] = None,
|
|
3168
3169
|
model: Optional[str] = None,
|
|
3169
|
-
) ->
|
|
3170
|
+
) -> list[ZeroShotClassificationOutputElement]:
|
|
3170
3171
|
"""
|
|
3171
3172
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
3172
3173
|
|
|
3173
3174
|
Args:
|
|
3174
3175
|
text (`str`):
|
|
3175
3176
|
The input text to classify.
|
|
3176
|
-
candidate_labels (`
|
|
3177
|
+
candidate_labels (`list[str]`):
|
|
3177
3178
|
The set of possible class labels to classify the text into.
|
|
3178
|
-
labels (`
|
|
3179
|
+
labels (`list[str]`, *optional*):
|
|
3179
3180
|
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
3180
3181
|
multi_label (`bool`, *optional*):
|
|
3181
3182
|
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
@@ -3190,12 +3191,12 @@ class AsyncInferenceClient:
|
|
|
3190
3191
|
|
|
3191
3192
|
|
|
3192
3193
|
Returns:
|
|
3193
|
-
`
|
|
3194
|
+
`list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3194
3195
|
|
|
3195
3196
|
Raises:
|
|
3196
3197
|
[`InferenceTimeoutError`]:
|
|
3197
3198
|
If the model is unavailable or the request times out.
|
|
3198
|
-
`
|
|
3199
|
+
[`HfHubHTTPError`]:
|
|
3199
3200
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3200
3201
|
|
|
3201
3202
|
Example with `multi_label=False`:
|
|
@@ -3269,22 +3270,22 @@ class AsyncInferenceClient:
|
|
|
3269
3270
|
async def zero_shot_image_classification(
|
|
3270
3271
|
self,
|
|
3271
3272
|
image: ContentT,
|
|
3272
|
-
candidate_labels:
|
|
3273
|
+
candidate_labels: list[str],
|
|
3273
3274
|
*,
|
|
3274
3275
|
model: Optional[str] = None,
|
|
3275
3276
|
hypothesis_template: Optional[str] = None,
|
|
3276
3277
|
# deprecated argument
|
|
3277
|
-
labels:
|
|
3278
|
-
) ->
|
|
3278
|
+
labels: list[str] = None, # type: ignore
|
|
3279
|
+
) -> list[ZeroShotImageClassificationOutputElement]:
|
|
3279
3280
|
"""
|
|
3280
3281
|
Provide input image and text labels to predict text labels for the image.
|
|
3281
3282
|
|
|
3282
3283
|
Args:
|
|
3283
3284
|
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3284
3285
|
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3285
|
-
candidate_labels (`
|
|
3286
|
+
candidate_labels (`list[str]`):
|
|
3286
3287
|
The candidate labels for this image
|
|
3287
|
-
labels (`
|
|
3288
|
+
labels (`list[str]`, *optional*):
|
|
3288
3289
|
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
3289
3290
|
model (`str`, *optional*):
|
|
3290
3291
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
@@ -3294,12 +3295,12 @@ class AsyncInferenceClient:
|
|
|
3294
3295
|
replacing the placeholder with the candidate labels.
|
|
3295
3296
|
|
|
3296
3297
|
Returns:
|
|
3297
|
-
`
|
|
3298
|
+
`list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3298
3299
|
|
|
3299
3300
|
Raises:
|
|
3300
3301
|
[`InferenceTimeoutError`]:
|
|
3301
3302
|
If the model is unavailable or the request times out.
|
|
3302
|
-
`
|
|
3303
|
+
[`HfHubHTTPError`]:
|
|
3303
3304
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3304
3305
|
|
|
3305
3306
|
Example:
|
|
@@ -3334,48 +3335,7 @@ class AsyncInferenceClient:
|
|
|
3334
3335
|
response = await self._inner_post(request_parameters)
|
|
3335
3336
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3336
3337
|
|
|
3337
|
-
def
|
|
3338
|
-
aiohttp = _import_aiohttp()
|
|
3339
|
-
client_headers = self.headers.copy()
|
|
3340
|
-
if headers is not None:
|
|
3341
|
-
client_headers.update(headers)
|
|
3342
|
-
|
|
3343
|
-
# Return a new aiohttp ClientSession with correct settings.
|
|
3344
|
-
session = aiohttp.ClientSession(
|
|
3345
|
-
headers=client_headers,
|
|
3346
|
-
cookies=self.cookies,
|
|
3347
|
-
timeout=aiohttp.ClientTimeout(self.timeout),
|
|
3348
|
-
trust_env=self.trust_env,
|
|
3349
|
-
)
|
|
3350
|
-
|
|
3351
|
-
# Keep track of sessions to close them later
|
|
3352
|
-
self._sessions[session] = set()
|
|
3353
|
-
|
|
3354
|
-
# Override the `._request` method to register responses to be closed
|
|
3355
|
-
session._wrapped_request = session._request
|
|
3356
|
-
|
|
3357
|
-
async def _request(method, url, **kwargs):
|
|
3358
|
-
response = await session._wrapped_request(method, url, **kwargs)
|
|
3359
|
-
self._sessions[session].add(response)
|
|
3360
|
-
return response
|
|
3361
|
-
|
|
3362
|
-
session._request = _request
|
|
3363
|
-
|
|
3364
|
-
# Override the 'close' method to
|
|
3365
|
-
# 1. close ongoing responses
|
|
3366
|
-
# 2. deregister the session when closed
|
|
3367
|
-
session._close = session.close
|
|
3368
|
-
|
|
3369
|
-
async def close_session():
|
|
3370
|
-
for response in self._sessions[session]:
|
|
3371
|
-
response.close()
|
|
3372
|
-
await session._close()
|
|
3373
|
-
self._sessions.pop(session, None)
|
|
3374
|
-
|
|
3375
|
-
session.close = close_session
|
|
3376
|
-
return session
|
|
3377
|
-
|
|
3378
|
-
async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
|
|
3338
|
+
async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
|
|
3379
3339
|
"""
|
|
3380
3340
|
Get information about the deployed endpoint.
|
|
3381
3341
|
|
|
@@ -3388,7 +3348,7 @@ class AsyncInferenceClient:
|
|
|
3388
3348
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
3389
3349
|
|
|
3390
3350
|
Returns:
|
|
3391
|
-
`
|
|
3351
|
+
`dict[str, Any]`: Information about the endpoint.
|
|
3392
3352
|
|
|
3393
3353
|
Example:
|
|
3394
3354
|
```py
|
|
@@ -3430,10 +3390,10 @@ class AsyncInferenceClient:
|
|
|
3430
3390
|
else:
|
|
3431
3391
|
url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
|
|
3432
3392
|
|
|
3433
|
-
|
|
3434
|
-
|
|
3435
|
-
|
|
3436
|
-
|
|
3393
|
+
client = await self._get_async_client()
|
|
3394
|
+
response = await client.get(url, headers=build_hf_headers(token=self.token))
|
|
3395
|
+
hf_raise_for_status(response)
|
|
3396
|
+
return response.json()
|
|
3437
3397
|
|
|
3438
3398
|
async def health_check(self, model: Optional[str] = None) -> bool:
|
|
3439
3399
|
"""
|
|
@@ -3467,9 +3427,9 @@ class AsyncInferenceClient:
|
|
|
3467
3427
|
raise ValueError("Model must be an Inference Endpoint URL.")
|
|
3468
3428
|
url = model.rstrip("/") + "/health"
|
|
3469
3429
|
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
|
|
3430
|
+
client = await self._get_async_client()
|
|
3431
|
+
response = await client.get(url, headers=build_hf_headers(token=self.token))
|
|
3432
|
+
return response.status_code == 200
|
|
3473
3433
|
|
|
3474
3434
|
@property
|
|
3475
3435
|
def chat(self) -> "ProxyClientChat":
|