huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +46 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +176 -20
- huggingface_hub/_local_folder.py +1 -1
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +10 -14
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +13 -14
- huggingface_hub/_upload_large_folder.py +120 -13
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/_cli_utils.py +2 -2
- huggingface_hub/cli/auth.py +8 -6
- huggingface_hub/cli/cache.py +18 -20
- huggingface_hub/cli/download.py +4 -4
- huggingface_hub/cli/hf.py +2 -5
- huggingface_hub/cli/jobs.py +599 -22
- huggingface_hub/cli/lfs.py +4 -4
- huggingface_hub/cli/repo.py +11 -7
- huggingface_hub/cli/repo_files.py +2 -2
- huggingface_hub/cli/upload.py +4 -4
- huggingface_hub/cli/upload_large_folder.py +3 -3
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +13 -13
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -19
- huggingface_hub/errors.py +42 -29
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +162 -259
- huggingface_hub/hf_api.py +841 -616
- huggingface_hub/hf_file_system.py +98 -62
- huggingface_hub/hub_mixin.py +37 -57
- huggingface_hub/inference/_client.py +257 -325
- huggingface_hub/inference/_common.py +110 -124
- huggingface_hub/inference/_generated/_async_client.py +307 -432
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +2 -3
- huggingface_hub/inference/_mcp/mcp_client.py +58 -30
- huggingface_hub/inference/_mcp/types.py +10 -7
- huggingface_hub/inference/_mcp/utils.py +11 -7
- huggingface_hub/inference/_providers/__init__.py +4 -2
- huggingface_hub/inference/_providers/_common.py +49 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +52 -21
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +28 -20
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +20 -31
- huggingface_hub/repocard.py +18 -18
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +25 -21
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +4 -4
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +366 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +16 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +32 -11
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
- huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -34,18 +34,17 @@
|
|
|
34
34
|
# - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
|
|
35
35
|
import base64
|
|
36
36
|
import logging
|
|
37
|
+
import os
|
|
37
38
|
import re
|
|
38
39
|
import warnings
|
|
39
|
-
from
|
|
40
|
-
|
|
41
|
-
from requests import HTTPError
|
|
40
|
+
from contextlib import ExitStack
|
|
41
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
42
42
|
|
|
43
43
|
from huggingface_hub import constants
|
|
44
|
-
from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
|
|
44
|
+
from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
|
|
45
45
|
from huggingface_hub.inference._common import (
|
|
46
46
|
TASKS_EXPECTING_IMAGES,
|
|
47
47
|
ContentT,
|
|
48
|
-
ModelStatus,
|
|
49
48
|
RequestParameters,
|
|
50
49
|
_b64_encode,
|
|
51
50
|
_b64_to_image,
|
|
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
|
|
|
54
53
|
_bytes_to_list,
|
|
55
54
|
_get_unsupported_text_generation_kwargs,
|
|
56
55
|
_import_numpy,
|
|
57
|
-
_open_as_binary,
|
|
58
56
|
_set_unsupported_text_generation_kwargs,
|
|
59
57
|
_stream_chat_completion_response,
|
|
60
58
|
_stream_text_generation_response,
|
|
@@ -81,6 +79,7 @@ from huggingface_hub.inference._generated.types import (
|
|
|
81
79
|
ImageSegmentationSubtask,
|
|
82
80
|
ImageToImageTargetSize,
|
|
83
81
|
ImageToTextOutput,
|
|
82
|
+
ImageToVideoTargetSize,
|
|
84
83
|
ObjectDetectionOutputElement,
|
|
85
84
|
Padding,
|
|
86
85
|
QuestionAnsweringOutputElement,
|
|
@@ -102,9 +101,13 @@ from huggingface_hub.inference._generated.types import (
|
|
|
102
101
|
ZeroShotImageClassificationOutputElement,
|
|
103
102
|
)
|
|
104
103
|
from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
|
|
105
|
-
from huggingface_hub.utils import
|
|
104
|
+
from huggingface_hub.utils import (
|
|
105
|
+
build_hf_headers,
|
|
106
|
+
get_session,
|
|
107
|
+
hf_raise_for_status,
|
|
108
|
+
validate_hf_hub_args,
|
|
109
|
+
)
|
|
106
110
|
from huggingface_hub.utils._auth import get_token
|
|
107
|
-
from huggingface_hub.utils._deprecation import _deprecate_method
|
|
108
111
|
|
|
109
112
|
|
|
110
113
|
if TYPE_CHECKING:
|
|
@@ -141,16 +144,14 @@ class InferenceClient:
|
|
|
141
144
|
arguments are mutually exclusive and have the exact same behavior.
|
|
142
145
|
timeout (`float`, `optional`):
|
|
143
146
|
The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
|
|
144
|
-
headers (`
|
|
147
|
+
headers (`dict[str, str]`, `optional`):
|
|
145
148
|
Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
|
|
146
149
|
Values in this dictionary will override the default values.
|
|
147
150
|
bill_to (`str`, `optional`):
|
|
148
151
|
The billing account to use for the requests. By default the requests are billed on the user's account.
|
|
149
152
|
Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
|
|
150
|
-
cookies (`
|
|
153
|
+
cookies (`dict[str, str]`, `optional`):
|
|
151
154
|
Additional cookies to send to the server.
|
|
152
|
-
proxies (`Any`, `optional`):
|
|
153
|
-
Proxies to use for the request.
|
|
154
155
|
base_url (`str`, `optional`):
|
|
155
156
|
Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
|
|
156
157
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
|
|
@@ -159,6 +160,7 @@ class InferenceClient:
|
|
|
159
160
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
|
|
160
161
|
"""
|
|
161
162
|
|
|
163
|
+
@validate_hf_hub_args
|
|
162
164
|
def __init__(
|
|
163
165
|
self,
|
|
164
166
|
model: Optional[str] = None,
|
|
@@ -166,9 +168,8 @@ class InferenceClient:
|
|
|
166
168
|
provider: Optional[PROVIDER_OR_POLICY_T] = None,
|
|
167
169
|
token: Optional[str] = None,
|
|
168
170
|
timeout: Optional[float] = None,
|
|
169
|
-
headers: Optional[
|
|
170
|
-
cookies: Optional[
|
|
171
|
-
proxies: Optional[Any] = None,
|
|
171
|
+
headers: Optional[dict[str, str]] = None,
|
|
172
|
+
cookies: Optional[dict[str, str]] = None,
|
|
172
173
|
bill_to: Optional[str] = None,
|
|
173
174
|
# OpenAI compatibility
|
|
174
175
|
base_url: Optional[str] = None,
|
|
@@ -230,11 +231,21 @@ class InferenceClient:
|
|
|
230
231
|
|
|
231
232
|
self.cookies = cookies
|
|
232
233
|
self.timeout = timeout
|
|
233
|
-
|
|
234
|
+
|
|
235
|
+
self.exit_stack = ExitStack()
|
|
234
236
|
|
|
235
237
|
def __repr__(self):
|
|
236
238
|
return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
|
|
237
239
|
|
|
240
|
+
def __enter__(self):
|
|
241
|
+
return self
|
|
242
|
+
|
|
243
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
244
|
+
self.exit_stack.close()
|
|
245
|
+
|
|
246
|
+
def close(self):
|
|
247
|
+
self.exit_stack.close()
|
|
248
|
+
|
|
238
249
|
@overload
|
|
239
250
|
def _inner_post( # type: ignore[misc]
|
|
240
251
|
self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
|
|
@@ -243,45 +254,46 @@ class InferenceClient:
|
|
|
243
254
|
@overload
|
|
244
255
|
def _inner_post( # type: ignore[misc]
|
|
245
256
|
self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
|
|
246
|
-
) -> Iterable[
|
|
257
|
+
) -> Iterable[str]: ...
|
|
247
258
|
|
|
248
259
|
@overload
|
|
249
260
|
def _inner_post(
|
|
250
261
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
251
|
-
) -> Union[bytes, Iterable[
|
|
262
|
+
) -> Union[bytes, Iterable[str]]: ...
|
|
252
263
|
|
|
253
264
|
def _inner_post(
|
|
254
265
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
255
|
-
) -> Union[bytes, Iterable[
|
|
266
|
+
) -> Union[bytes, Iterable[str]]:
|
|
256
267
|
"""Make a request to the inference server."""
|
|
257
268
|
# TODO: this should be handled in provider helpers directly
|
|
258
269
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
259
270
|
request_parameters.headers["Accept"] = "image/png"
|
|
260
271
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
272
|
+
try:
|
|
273
|
+
response = self.exit_stack.enter_context(
|
|
274
|
+
get_session().stream(
|
|
275
|
+
"POST",
|
|
264
276
|
request_parameters.url,
|
|
265
277
|
json=request_parameters.json,
|
|
266
|
-
data
|
|
278
|
+
content=request_parameters.data,
|
|
267
279
|
headers=request_parameters.headers,
|
|
268
280
|
cookies=self.cookies,
|
|
269
281
|
timeout=self.timeout,
|
|
270
|
-
stream=stream,
|
|
271
|
-
proxies=self.proxies,
|
|
272
282
|
)
|
|
273
|
-
|
|
274
|
-
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
275
|
-
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
276
|
-
|
|
277
|
-
try:
|
|
283
|
+
)
|
|
278
284
|
hf_raise_for_status(response)
|
|
279
|
-
|
|
280
|
-
|
|
285
|
+
if stream:
|
|
286
|
+
return response.iter_lines()
|
|
287
|
+
else:
|
|
288
|
+
return response.read()
|
|
289
|
+
except TimeoutError as error:
|
|
290
|
+
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
291
|
+
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
292
|
+
except HfHubHTTPError as error:
|
|
281
293
|
if error.response.status_code == 422 and request_parameters.task != "unknown":
|
|
282
294
|
msg = str(error.args[0])
|
|
283
295
|
if len(error.response.text) > 0:
|
|
284
|
-
msg += f"
|
|
296
|
+
msg += f"{os.linesep}{error.response.text}{os.linesep}"
|
|
285
297
|
error.args = (msg,) + error.args[1:]
|
|
286
298
|
raise
|
|
287
299
|
|
|
@@ -292,7 +304,7 @@ class InferenceClient:
|
|
|
292
304
|
model: Optional[str] = None,
|
|
293
305
|
top_k: Optional[int] = None,
|
|
294
306
|
function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
|
|
295
|
-
) ->
|
|
307
|
+
) -> list[AudioClassificationOutputElement]:
|
|
296
308
|
"""
|
|
297
309
|
Perform audio classification on the provided audio content.
|
|
298
310
|
|
|
@@ -310,12 +322,12 @@ class InferenceClient:
|
|
|
310
322
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
311
323
|
|
|
312
324
|
Returns:
|
|
313
|
-
`
|
|
325
|
+
`list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
314
326
|
|
|
315
327
|
Raises:
|
|
316
328
|
[`InferenceTimeoutError`]:
|
|
317
329
|
If the model is unavailable or the request times out.
|
|
318
|
-
`
|
|
330
|
+
[`HfHubHTTPError`]:
|
|
319
331
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
320
332
|
|
|
321
333
|
Example:
|
|
@@ -347,7 +359,7 @@ class InferenceClient:
|
|
|
347
359
|
audio: ContentT,
|
|
348
360
|
*,
|
|
349
361
|
model: Optional[str] = None,
|
|
350
|
-
) ->
|
|
362
|
+
) -> list[AudioToAudioOutputElement]:
|
|
351
363
|
"""
|
|
352
364
|
Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
|
|
353
365
|
|
|
@@ -361,12 +373,12 @@ class InferenceClient:
|
|
|
361
373
|
audio_to_audio will be used.
|
|
362
374
|
|
|
363
375
|
Returns:
|
|
364
|
-
`
|
|
376
|
+
`list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
|
|
365
377
|
|
|
366
378
|
Raises:
|
|
367
379
|
`InferenceTimeoutError`:
|
|
368
380
|
If the model is unavailable or the request times out.
|
|
369
|
-
`
|
|
381
|
+
[`HfHubHTTPError`]:
|
|
370
382
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
371
383
|
|
|
372
384
|
Example:
|
|
@@ -399,7 +411,7 @@ class InferenceClient:
|
|
|
399
411
|
audio: ContentT,
|
|
400
412
|
*,
|
|
401
413
|
model: Optional[str] = None,
|
|
402
|
-
extra_body: Optional[
|
|
414
|
+
extra_body: Optional[dict] = None,
|
|
403
415
|
) -> AutomaticSpeechRecognitionOutput:
|
|
404
416
|
"""
|
|
405
417
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -410,7 +422,7 @@ class InferenceClient:
|
|
|
410
422
|
model (`str`, *optional*):
|
|
411
423
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
412
424
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
413
|
-
extra_body (`
|
|
425
|
+
extra_body (`dict`, *optional*):
|
|
414
426
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
415
427
|
for supported parameters.
|
|
416
428
|
Returns:
|
|
@@ -419,7 +431,7 @@ class InferenceClient:
|
|
|
419
431
|
Raises:
|
|
420
432
|
[`InferenceTimeoutError`]:
|
|
421
433
|
If the model is unavailable or the request times out.
|
|
422
|
-
`
|
|
434
|
+
[`HfHubHTTPError`]:
|
|
423
435
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
424
436
|
|
|
425
437
|
Example:
|
|
@@ -445,105 +457,105 @@ class InferenceClient:
|
|
|
445
457
|
@overload
|
|
446
458
|
def chat_completion( # type: ignore
|
|
447
459
|
self,
|
|
448
|
-
messages:
|
|
460
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
449
461
|
*,
|
|
450
462
|
model: Optional[str] = None,
|
|
451
463
|
stream: Literal[False] = False,
|
|
452
464
|
frequency_penalty: Optional[float] = None,
|
|
453
|
-
logit_bias: Optional[
|
|
465
|
+
logit_bias: Optional[list[float]] = None,
|
|
454
466
|
logprobs: Optional[bool] = None,
|
|
455
467
|
max_tokens: Optional[int] = None,
|
|
456
468
|
n: Optional[int] = None,
|
|
457
469
|
presence_penalty: Optional[float] = None,
|
|
458
470
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
459
471
|
seed: Optional[int] = None,
|
|
460
|
-
stop: Optional[
|
|
472
|
+
stop: Optional[list[str]] = None,
|
|
461
473
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
462
474
|
temperature: Optional[float] = None,
|
|
463
475
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
464
476
|
tool_prompt: Optional[str] = None,
|
|
465
|
-
tools: Optional[
|
|
477
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
466
478
|
top_logprobs: Optional[int] = None,
|
|
467
479
|
top_p: Optional[float] = None,
|
|
468
|
-
extra_body: Optional[
|
|
480
|
+
extra_body: Optional[dict] = None,
|
|
469
481
|
) -> ChatCompletionOutput: ...
|
|
470
482
|
|
|
471
483
|
@overload
|
|
472
484
|
def chat_completion( # type: ignore
|
|
473
485
|
self,
|
|
474
|
-
messages:
|
|
486
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
475
487
|
*,
|
|
476
488
|
model: Optional[str] = None,
|
|
477
489
|
stream: Literal[True] = True,
|
|
478
490
|
frequency_penalty: Optional[float] = None,
|
|
479
|
-
logit_bias: Optional[
|
|
491
|
+
logit_bias: Optional[list[float]] = None,
|
|
480
492
|
logprobs: Optional[bool] = None,
|
|
481
493
|
max_tokens: Optional[int] = None,
|
|
482
494
|
n: Optional[int] = None,
|
|
483
495
|
presence_penalty: Optional[float] = None,
|
|
484
496
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
485
497
|
seed: Optional[int] = None,
|
|
486
|
-
stop: Optional[
|
|
498
|
+
stop: Optional[list[str]] = None,
|
|
487
499
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
488
500
|
temperature: Optional[float] = None,
|
|
489
501
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
490
502
|
tool_prompt: Optional[str] = None,
|
|
491
|
-
tools: Optional[
|
|
503
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
492
504
|
top_logprobs: Optional[int] = None,
|
|
493
505
|
top_p: Optional[float] = None,
|
|
494
|
-
extra_body: Optional[
|
|
506
|
+
extra_body: Optional[dict] = None,
|
|
495
507
|
) -> Iterable[ChatCompletionStreamOutput]: ...
|
|
496
508
|
|
|
497
509
|
@overload
|
|
498
510
|
def chat_completion(
|
|
499
511
|
self,
|
|
500
|
-
messages:
|
|
512
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
501
513
|
*,
|
|
502
514
|
model: Optional[str] = None,
|
|
503
515
|
stream: bool = False,
|
|
504
516
|
frequency_penalty: Optional[float] = None,
|
|
505
|
-
logit_bias: Optional[
|
|
517
|
+
logit_bias: Optional[list[float]] = None,
|
|
506
518
|
logprobs: Optional[bool] = None,
|
|
507
519
|
max_tokens: Optional[int] = None,
|
|
508
520
|
n: Optional[int] = None,
|
|
509
521
|
presence_penalty: Optional[float] = None,
|
|
510
522
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
511
523
|
seed: Optional[int] = None,
|
|
512
|
-
stop: Optional[
|
|
524
|
+
stop: Optional[list[str]] = None,
|
|
513
525
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
514
526
|
temperature: Optional[float] = None,
|
|
515
527
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
516
528
|
tool_prompt: Optional[str] = None,
|
|
517
|
-
tools: Optional[
|
|
529
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
518
530
|
top_logprobs: Optional[int] = None,
|
|
519
531
|
top_p: Optional[float] = None,
|
|
520
|
-
extra_body: Optional[
|
|
532
|
+
extra_body: Optional[dict] = None,
|
|
521
533
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
|
|
522
534
|
|
|
523
535
|
def chat_completion(
|
|
524
536
|
self,
|
|
525
|
-
messages:
|
|
537
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
526
538
|
*,
|
|
527
539
|
model: Optional[str] = None,
|
|
528
540
|
stream: bool = False,
|
|
529
541
|
# Parameters from ChatCompletionInput (handled manually)
|
|
530
542
|
frequency_penalty: Optional[float] = None,
|
|
531
|
-
logit_bias: Optional[
|
|
543
|
+
logit_bias: Optional[list[float]] = None,
|
|
532
544
|
logprobs: Optional[bool] = None,
|
|
533
545
|
max_tokens: Optional[int] = None,
|
|
534
546
|
n: Optional[int] = None,
|
|
535
547
|
presence_penalty: Optional[float] = None,
|
|
536
548
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
537
549
|
seed: Optional[int] = None,
|
|
538
|
-
stop: Optional[
|
|
550
|
+
stop: Optional[list[str]] = None,
|
|
539
551
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
540
552
|
temperature: Optional[float] = None,
|
|
541
553
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
542
554
|
tool_prompt: Optional[str] = None,
|
|
543
|
-
tools: Optional[
|
|
555
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
544
556
|
top_logprobs: Optional[int] = None,
|
|
545
557
|
top_p: Optional[float] = None,
|
|
546
|
-
extra_body: Optional[
|
|
558
|
+
extra_body: Optional[dict] = None,
|
|
547
559
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
|
|
548
560
|
"""
|
|
549
561
|
A method for completing conversations using a specified language model.
|
|
@@ -573,7 +585,7 @@ class InferenceClient:
|
|
|
573
585
|
frequency_penalty (`float`, *optional*):
|
|
574
586
|
Penalizes new tokens based on their existing frequency
|
|
575
587
|
in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
|
|
576
|
-
logit_bias (`
|
|
588
|
+
logit_bias (`list[float]`, *optional*):
|
|
577
589
|
Adjusts the likelihood of specific tokens appearing in the generated output.
|
|
578
590
|
logprobs (`bool`, *optional*):
|
|
579
591
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
@@ -589,7 +601,7 @@ class InferenceClient:
|
|
|
589
601
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
590
602
|
seed (Optional[`int`], *optional*):
|
|
591
603
|
Seed for reproducible control flow. Defaults to None.
|
|
592
|
-
stop (`
|
|
604
|
+
stop (`list[str]`, *optional*):
|
|
593
605
|
Up to four strings which trigger the end of the response.
|
|
594
606
|
Defaults to None.
|
|
595
607
|
stream (`bool`, *optional*):
|
|
@@ -613,7 +625,7 @@ class InferenceClient:
|
|
|
613
625
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
614
626
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
615
627
|
provide a list of functions the model may generate JSON inputs for.
|
|
616
|
-
extra_body (`
|
|
628
|
+
extra_body (`dict`, *optional*):
|
|
617
629
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
618
630
|
for supported parameters.
|
|
619
631
|
Returns:
|
|
@@ -625,7 +637,7 @@ class InferenceClient:
|
|
|
625
637
|
Raises:
|
|
626
638
|
[`InferenceTimeoutError`]:
|
|
627
639
|
If the model is unavailable or the request times out.
|
|
628
|
-
`
|
|
640
|
+
[`HfHubHTTPError`]:
|
|
629
641
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
630
642
|
|
|
631
643
|
Example:
|
|
@@ -939,8 +951,8 @@ class InferenceClient:
|
|
|
939
951
|
max_question_len: Optional[int] = None,
|
|
940
952
|
max_seq_len: Optional[int] = None,
|
|
941
953
|
top_k: Optional[int] = None,
|
|
942
|
-
word_boxes: Optional[
|
|
943
|
-
) ->
|
|
954
|
+
word_boxes: Optional[list[Union[list[float], str]]] = None,
|
|
955
|
+
) -> list[DocumentQuestionAnsweringOutputElement]:
|
|
944
956
|
"""
|
|
945
957
|
Answer questions on document images.
|
|
946
958
|
|
|
@@ -970,16 +982,16 @@ class InferenceClient:
|
|
|
970
982
|
top_k (`int`, *optional*):
|
|
971
983
|
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
972
984
|
answers if there are not enough options available within the context.
|
|
973
|
-
word_boxes (`
|
|
985
|
+
word_boxes (`list[Union[list[float], str`, *optional*):
|
|
974
986
|
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
975
987
|
step and use the provided bounding boxes instead.
|
|
976
988
|
Returns:
|
|
977
|
-
`
|
|
989
|
+
`list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
978
990
|
|
|
979
991
|
Raises:
|
|
980
992
|
[`InferenceTimeoutError`]:
|
|
981
993
|
If the model is unavailable or the request times out.
|
|
982
|
-
`
|
|
994
|
+
[`HfHubHTTPError`]:
|
|
983
995
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
984
996
|
|
|
985
997
|
|
|
@@ -993,7 +1005,7 @@ class InferenceClient:
|
|
|
993
1005
|
"""
|
|
994
1006
|
model_id = model or self.model
|
|
995
1007
|
provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
|
|
996
|
-
inputs:
|
|
1008
|
+
inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
997
1009
|
request_parameters = provider_helper.prepare_request(
|
|
998
1010
|
inputs=inputs,
|
|
999
1011
|
parameters={
|
|
@@ -1054,7 +1066,7 @@ class InferenceClient:
|
|
|
1054
1066
|
Raises:
|
|
1055
1067
|
[`InferenceTimeoutError`]:
|
|
1056
1068
|
If the model is unavailable or the request times out.
|
|
1057
|
-
`
|
|
1069
|
+
[`HfHubHTTPError`]:
|
|
1058
1070
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1059
1071
|
|
|
1060
1072
|
Example:
|
|
@@ -1091,9 +1103,9 @@ class InferenceClient:
|
|
|
1091
1103
|
text: str,
|
|
1092
1104
|
*,
|
|
1093
1105
|
model: Optional[str] = None,
|
|
1094
|
-
targets: Optional[
|
|
1106
|
+
targets: Optional[list[str]] = None,
|
|
1095
1107
|
top_k: Optional[int] = None,
|
|
1096
|
-
) ->
|
|
1108
|
+
) -> list[FillMaskOutputElement]:
|
|
1097
1109
|
"""
|
|
1098
1110
|
Fill in a hole with a missing word (token to be precise).
|
|
1099
1111
|
|
|
@@ -1103,20 +1115,20 @@ class InferenceClient:
|
|
|
1103
1115
|
model (`str`, *optional*):
|
|
1104
1116
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1105
1117
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1106
|
-
targets (`
|
|
1118
|
+
targets (`list[str`, *optional*):
|
|
1107
1119
|
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1108
1120
|
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1109
1121
|
resulting token will be used (with a warning, and that might be slower).
|
|
1110
1122
|
top_k (`int`, *optional*):
|
|
1111
1123
|
When passed, overrides the number of predictions to return.
|
|
1112
1124
|
Returns:
|
|
1113
|
-
`
|
|
1125
|
+
`list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
|
|
1114
1126
|
probability, token reference, and completed text.
|
|
1115
1127
|
|
|
1116
1128
|
Raises:
|
|
1117
1129
|
[`InferenceTimeoutError`]:
|
|
1118
1130
|
If the model is unavailable or the request times out.
|
|
1119
|
-
`
|
|
1131
|
+
[`HfHubHTTPError`]:
|
|
1120
1132
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1121
1133
|
|
|
1122
1134
|
Example:
|
|
@@ -1149,7 +1161,7 @@ class InferenceClient:
|
|
|
1149
1161
|
model: Optional[str] = None,
|
|
1150
1162
|
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1151
1163
|
top_k: Optional[int] = None,
|
|
1152
|
-
) ->
|
|
1164
|
+
) -> list[ImageClassificationOutputElement]:
|
|
1153
1165
|
"""
|
|
1154
1166
|
Perform image classification on the given image using the specified model.
|
|
1155
1167
|
|
|
@@ -1164,12 +1176,12 @@ class InferenceClient:
|
|
|
1164
1176
|
top_k (`int`, *optional*):
|
|
1165
1177
|
When specified, limits the output to the top K most probable classes.
|
|
1166
1178
|
Returns:
|
|
1167
|
-
`
|
|
1179
|
+
`list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1168
1180
|
|
|
1169
1181
|
Raises:
|
|
1170
1182
|
[`InferenceTimeoutError`]:
|
|
1171
1183
|
If the model is unavailable or the request times out.
|
|
1172
|
-
`
|
|
1184
|
+
[`HfHubHTTPError`]:
|
|
1173
1185
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1174
1186
|
|
|
1175
1187
|
Example:
|
|
@@ -1201,7 +1213,7 @@ class InferenceClient:
|
|
|
1201
1213
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1202
1214
|
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1203
1215
|
threshold: Optional[float] = None,
|
|
1204
|
-
) ->
|
|
1216
|
+
) -> list[ImageSegmentationOutputElement]:
|
|
1205
1217
|
"""
|
|
1206
1218
|
Perform image segmentation on the given image using the specified model.
|
|
1207
1219
|
|
|
@@ -1226,12 +1238,12 @@ class InferenceClient:
|
|
|
1226
1238
|
threshold (`float`, *optional*):
|
|
1227
1239
|
Probability threshold to filter out predicted masks.
|
|
1228
1240
|
Returns:
|
|
1229
|
-
`
|
|
1241
|
+
`list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
|
|
1230
1242
|
|
|
1231
1243
|
Raises:
|
|
1232
1244
|
[`InferenceTimeoutError`]:
|
|
1233
1245
|
If the model is unavailable or the request times out.
|
|
1234
|
-
`
|
|
1246
|
+
[`HfHubHTTPError`]:
|
|
1235
1247
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1236
1248
|
|
|
1237
1249
|
Example:
|
|
@@ -1308,7 +1320,7 @@ class InferenceClient:
|
|
|
1308
1320
|
Raises:
|
|
1309
1321
|
[`InferenceTimeoutError`]:
|
|
1310
1322
|
If the model is unavailable or the request times out.
|
|
1311
|
-
`
|
|
1323
|
+
[`HfHubHTTPError`]:
|
|
1312
1324
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1313
1325
|
|
|
1314
1326
|
Example:
|
|
@@ -1339,6 +1351,85 @@ class InferenceClient:
|
|
|
1339
1351
|
response = provider_helper.get_response(response, request_parameters)
|
|
1340
1352
|
return _bytes_to_image(response)
|
|
1341
1353
|
|
|
1354
|
+
def image_to_video(
|
|
1355
|
+
self,
|
|
1356
|
+
image: ContentT,
|
|
1357
|
+
*,
|
|
1358
|
+
model: Optional[str] = None,
|
|
1359
|
+
prompt: Optional[str] = None,
|
|
1360
|
+
negative_prompt: Optional[str] = None,
|
|
1361
|
+
num_frames: Optional[float] = None,
|
|
1362
|
+
num_inference_steps: Optional[int] = None,
|
|
1363
|
+
guidance_scale: Optional[float] = None,
|
|
1364
|
+
seed: Optional[int] = None,
|
|
1365
|
+
target_size: Optional[ImageToVideoTargetSize] = None,
|
|
1366
|
+
**kwargs,
|
|
1367
|
+
) -> bytes:
|
|
1368
|
+
"""
|
|
1369
|
+
Generate a video from an input image.
|
|
1370
|
+
|
|
1371
|
+
Args:
|
|
1372
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1373
|
+
The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1374
|
+
model (`str`, *optional*):
|
|
1375
|
+
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1376
|
+
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
1377
|
+
prompt (`str`, *optional*):
|
|
1378
|
+
The text prompt to guide the video generation.
|
|
1379
|
+
negative_prompt (`str`, *optional*):
|
|
1380
|
+
One prompt to guide what NOT to include in video generation.
|
|
1381
|
+
num_frames (`float`, *optional*):
|
|
1382
|
+
The num_frames parameter determines how many video frames are generated.
|
|
1383
|
+
num_inference_steps (`int`, *optional*):
|
|
1384
|
+
For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
|
|
1385
|
+
quality image at the expense of slower inference.
|
|
1386
|
+
guidance_scale (`float`, *optional*):
|
|
1387
|
+
For diffusion models. A higher guidance scale value encourages the model to generate videos closely
|
|
1388
|
+
linked to the text prompt at the expense of lower image quality.
|
|
1389
|
+
seed (`int`, *optional*):
|
|
1390
|
+
The seed to use for the video generation.
|
|
1391
|
+
target_size (`ImageToVideoTargetSize`, *optional*):
|
|
1392
|
+
The size in pixel of the output video frames.
|
|
1393
|
+
num_inference_steps (`int`, *optional*):
|
|
1394
|
+
The number of denoising steps. More denoising steps usually lead to a higher quality video at the
|
|
1395
|
+
expense of slower inference.
|
|
1396
|
+
seed (`int`, *optional*):
|
|
1397
|
+
Seed for the random number generator.
|
|
1398
|
+
|
|
1399
|
+
Returns:
|
|
1400
|
+
`bytes`: The generated video.
|
|
1401
|
+
|
|
1402
|
+
Examples:
|
|
1403
|
+
```py
|
|
1404
|
+
>>> from huggingface_hub import InferenceClient
|
|
1405
|
+
>>> client = InferenceClient()
|
|
1406
|
+
>>> video = client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger")
|
|
1407
|
+
>>> with open("tiger.mp4", "wb") as f:
|
|
1408
|
+
... f.write(video)
|
|
1409
|
+
```
|
|
1410
|
+
"""
|
|
1411
|
+
model_id = model or self.model
|
|
1412
|
+
provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id)
|
|
1413
|
+
request_parameters = provider_helper.prepare_request(
|
|
1414
|
+
inputs=image,
|
|
1415
|
+
parameters={
|
|
1416
|
+
"prompt": prompt,
|
|
1417
|
+
"negative_prompt": negative_prompt,
|
|
1418
|
+
"num_frames": num_frames,
|
|
1419
|
+
"num_inference_steps": num_inference_steps,
|
|
1420
|
+
"guidance_scale": guidance_scale,
|
|
1421
|
+
"seed": seed,
|
|
1422
|
+
"target_size": target_size,
|
|
1423
|
+
**kwargs,
|
|
1424
|
+
},
|
|
1425
|
+
headers=self.headers,
|
|
1426
|
+
model=model_id,
|
|
1427
|
+
api_key=self.token,
|
|
1428
|
+
)
|
|
1429
|
+
response = self._inner_post(request_parameters)
|
|
1430
|
+
response = provider_helper.get_response(response, request_parameters)
|
|
1431
|
+
return response
|
|
1432
|
+
|
|
1342
1433
|
def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
|
|
1343
1434
|
"""
|
|
1344
1435
|
Takes an input image and return text.
|
|
@@ -1359,7 +1450,7 @@ class InferenceClient:
|
|
|
1359
1450
|
Raises:
|
|
1360
1451
|
[`InferenceTimeoutError`]:
|
|
1361
1452
|
If the model is unavailable or the request times out.
|
|
1362
|
-
`
|
|
1453
|
+
[`HfHubHTTPError`]:
|
|
1363
1454
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1364
1455
|
|
|
1365
1456
|
Example:
|
|
@@ -1382,12 +1473,12 @@ class InferenceClient:
|
|
|
1382
1473
|
api_key=self.token,
|
|
1383
1474
|
)
|
|
1384
1475
|
response = self._inner_post(request_parameters)
|
|
1385
|
-
|
|
1386
|
-
return
|
|
1476
|
+
output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
|
|
1477
|
+
return output_list[0]
|
|
1387
1478
|
|
|
1388
1479
|
def object_detection(
|
|
1389
1480
|
self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
|
|
1390
|
-
) ->
|
|
1481
|
+
) -> list[ObjectDetectionOutputElement]:
|
|
1391
1482
|
"""
|
|
1392
1483
|
Perform object detection on the given image using the specified model.
|
|
1393
1484
|
|
|
@@ -1406,12 +1497,12 @@ class InferenceClient:
|
|
|
1406
1497
|
threshold (`float`, *optional*):
|
|
1407
1498
|
The probability necessary to make a prediction.
|
|
1408
1499
|
Returns:
|
|
1409
|
-
`
|
|
1500
|
+
`list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
|
|
1410
1501
|
|
|
1411
1502
|
Raises:
|
|
1412
1503
|
[`InferenceTimeoutError`]:
|
|
1413
1504
|
If the model is unavailable or the request times out.
|
|
1414
|
-
`
|
|
1505
|
+
[`HfHubHTTPError`]:
|
|
1415
1506
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1416
1507
|
`ValueError`:
|
|
1417
1508
|
If the request output is not a List.
|
|
@@ -1449,7 +1540,7 @@ class InferenceClient:
|
|
|
1449
1540
|
max_question_len: Optional[int] = None,
|
|
1450
1541
|
max_seq_len: Optional[int] = None,
|
|
1451
1542
|
top_k: Optional[int] = None,
|
|
1452
|
-
) -> Union[QuestionAnsweringOutputElement,
|
|
1543
|
+
) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
|
|
1453
1544
|
"""
|
|
1454
1545
|
Retrieve the answer to a question from a given text.
|
|
1455
1546
|
|
|
@@ -1481,13 +1572,13 @@ class InferenceClient:
|
|
|
1481
1572
|
topk answers if there are not enough options available within the context.
|
|
1482
1573
|
|
|
1483
1574
|
Returns:
|
|
1484
|
-
Union[`QuestionAnsweringOutputElement`,
|
|
1575
|
+
Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
|
|
1485
1576
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
1486
1577
|
When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
|
|
1487
1578
|
Raises:
|
|
1488
1579
|
[`InferenceTimeoutError`]:
|
|
1489
1580
|
If the model is unavailable or the request times out.
|
|
1490
|
-
`
|
|
1581
|
+
[`HfHubHTTPError`]:
|
|
1491
1582
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1492
1583
|
|
|
1493
1584
|
Example:
|
|
@@ -1521,15 +1612,15 @@ class InferenceClient:
|
|
|
1521
1612
|
return output
|
|
1522
1613
|
|
|
1523
1614
|
def sentence_similarity(
|
|
1524
|
-
self, sentence: str, other_sentences:
|
|
1525
|
-
) ->
|
|
1615
|
+
self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
|
|
1616
|
+
) -> list[float]:
|
|
1526
1617
|
"""
|
|
1527
1618
|
Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
|
|
1528
1619
|
|
|
1529
1620
|
Args:
|
|
1530
1621
|
sentence (`str`):
|
|
1531
1622
|
The main sentence to compare to others.
|
|
1532
|
-
other_sentences (`
|
|
1623
|
+
other_sentences (`list[str]`):
|
|
1533
1624
|
The list of sentences to compare to.
|
|
1534
1625
|
model (`str`, *optional*):
|
|
1535
1626
|
The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1537,12 +1628,12 @@ class InferenceClient:
|
|
|
1537
1628
|
Defaults to None.
|
|
1538
1629
|
|
|
1539
1630
|
Returns:
|
|
1540
|
-
`
|
|
1631
|
+
`list[float]`: The embedding representing the input text.
|
|
1541
1632
|
|
|
1542
1633
|
Raises:
|
|
1543
1634
|
[`InferenceTimeoutError`]:
|
|
1544
1635
|
If the model is unavailable or the request times out.
|
|
1545
|
-
`
|
|
1636
|
+
[`HfHubHTTPError`]:
|
|
1546
1637
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1547
1638
|
|
|
1548
1639
|
Example:
|
|
@@ -1579,7 +1670,7 @@ class InferenceClient:
|
|
|
1579
1670
|
*,
|
|
1580
1671
|
model: Optional[str] = None,
|
|
1581
1672
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1582
|
-
generate_parameters: Optional[
|
|
1673
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
1583
1674
|
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1584
1675
|
) -> SummarizationOutput:
|
|
1585
1676
|
"""
|
|
@@ -1593,7 +1684,7 @@ class InferenceClient:
|
|
|
1593
1684
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
1594
1685
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
1595
1686
|
Whether to clean up the potential extra spaces in the text output.
|
|
1596
|
-
generate_parameters (`
|
|
1687
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
1597
1688
|
Additional parametrization of the text generation algorithm.
|
|
1598
1689
|
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1599
1690
|
The truncation strategy to use.
|
|
@@ -1603,7 +1694,7 @@ class InferenceClient:
|
|
|
1603
1694
|
Raises:
|
|
1604
1695
|
[`InferenceTimeoutError`]:
|
|
1605
1696
|
If the model is unavailable or the request times out.
|
|
1606
|
-
`
|
|
1697
|
+
[`HfHubHTTPError`]:
|
|
1607
1698
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1608
1699
|
|
|
1609
1700
|
Example:
|
|
@@ -1633,7 +1724,7 @@ class InferenceClient:
|
|
|
1633
1724
|
|
|
1634
1725
|
def table_question_answering(
|
|
1635
1726
|
self,
|
|
1636
|
-
table:
|
|
1727
|
+
table: dict[str, Any],
|
|
1637
1728
|
query: str,
|
|
1638
1729
|
*,
|
|
1639
1730
|
model: Optional[str] = None,
|
|
@@ -1668,7 +1759,7 @@ class InferenceClient:
|
|
|
1668
1759
|
Raises:
|
|
1669
1760
|
[`InferenceTimeoutError`]:
|
|
1670
1761
|
If the model is unavailable or the request times out.
|
|
1671
|
-
`
|
|
1762
|
+
[`HfHubHTTPError`]:
|
|
1672
1763
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1673
1764
|
|
|
1674
1765
|
Example:
|
|
@@ -1693,12 +1784,12 @@ class InferenceClient:
|
|
|
1693
1784
|
response = self._inner_post(request_parameters)
|
|
1694
1785
|
return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
|
|
1695
1786
|
|
|
1696
|
-
def tabular_classification(self, table:
|
|
1787
|
+
def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
|
|
1697
1788
|
"""
|
|
1698
1789
|
Classifying a target category (a group) based on a set of attributes.
|
|
1699
1790
|
|
|
1700
1791
|
Args:
|
|
1701
|
-
table (`
|
|
1792
|
+
table (`dict[str, Any]`):
|
|
1702
1793
|
Set of attributes to classify.
|
|
1703
1794
|
model (`str`, *optional*):
|
|
1704
1795
|
The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1711,7 +1802,7 @@ class InferenceClient:
|
|
|
1711
1802
|
Raises:
|
|
1712
1803
|
[`InferenceTimeoutError`]:
|
|
1713
1804
|
If the model is unavailable or the request times out.
|
|
1714
|
-
`
|
|
1805
|
+
[`HfHubHTTPError`]:
|
|
1715
1806
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1716
1807
|
|
|
1717
1808
|
Example:
|
|
@@ -1748,12 +1839,12 @@ class InferenceClient:
|
|
|
1748
1839
|
response = self._inner_post(request_parameters)
|
|
1749
1840
|
return _bytes_to_list(response)
|
|
1750
1841
|
|
|
1751
|
-
def tabular_regression(self, table:
|
|
1842
|
+
def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
|
|
1752
1843
|
"""
|
|
1753
1844
|
Predicting a numerical target value given a set of attributes/features in a table.
|
|
1754
1845
|
|
|
1755
1846
|
Args:
|
|
1756
|
-
table (`
|
|
1847
|
+
table (`dict[str, Any]`):
|
|
1757
1848
|
Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
|
|
1758
1849
|
model (`str`, *optional*):
|
|
1759
1850
|
The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1766,7 +1857,7 @@ class InferenceClient:
|
|
|
1766
1857
|
Raises:
|
|
1767
1858
|
[`InferenceTimeoutError`]:
|
|
1768
1859
|
If the model is unavailable or the request times out.
|
|
1769
|
-
`
|
|
1860
|
+
[`HfHubHTTPError`]:
|
|
1770
1861
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1771
1862
|
|
|
1772
1863
|
Example:
|
|
@@ -1805,7 +1896,7 @@ class InferenceClient:
|
|
|
1805
1896
|
model: Optional[str] = None,
|
|
1806
1897
|
top_k: Optional[int] = None,
|
|
1807
1898
|
function_to_apply: Optional["TextClassificationOutputTransform"] = None,
|
|
1808
|
-
) ->
|
|
1899
|
+
) -> list[TextClassificationOutputElement]:
|
|
1809
1900
|
"""
|
|
1810
1901
|
Perform text classification (e.g. sentiment-analysis) on the given text.
|
|
1811
1902
|
|
|
@@ -1822,12 +1913,12 @@ class InferenceClient:
|
|
|
1822
1913
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
1823
1914
|
|
|
1824
1915
|
Returns:
|
|
1825
|
-
`
|
|
1916
|
+
`list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1826
1917
|
|
|
1827
1918
|
Raises:
|
|
1828
1919
|
[`InferenceTimeoutError`]:
|
|
1829
1920
|
If the model is unavailable or the request times out.
|
|
1830
|
-
`
|
|
1921
|
+
[`HfHubHTTPError`]:
|
|
1831
1922
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1832
1923
|
|
|
1833
1924
|
Example:
|
|
@@ -1875,8 +1966,8 @@ class InferenceClient:
|
|
|
1875
1966
|
repetition_penalty: Optional[float] = None,
|
|
1876
1967
|
return_full_text: Optional[bool] = None,
|
|
1877
1968
|
seed: Optional[int] = None,
|
|
1878
|
-
stop: Optional[
|
|
1879
|
-
stop_sequences: Optional[
|
|
1969
|
+
stop: Optional[list[str]] = None,
|
|
1970
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1880
1971
|
temperature: Optional[float] = None,
|
|
1881
1972
|
top_k: Optional[int] = None,
|
|
1882
1973
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1905,8 +1996,8 @@ class InferenceClient:
|
|
|
1905
1996
|
repetition_penalty: Optional[float] = None,
|
|
1906
1997
|
return_full_text: Optional[bool] = None,
|
|
1907
1998
|
seed: Optional[int] = None,
|
|
1908
|
-
stop: Optional[
|
|
1909
|
-
stop_sequences: Optional[
|
|
1999
|
+
stop: Optional[list[str]] = None,
|
|
2000
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1910
2001
|
temperature: Optional[float] = None,
|
|
1911
2002
|
top_k: Optional[int] = None,
|
|
1912
2003
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1935,8 +2026,8 @@ class InferenceClient:
|
|
|
1935
2026
|
repetition_penalty: Optional[float] = None,
|
|
1936
2027
|
return_full_text: Optional[bool] = None, # Manual default value
|
|
1937
2028
|
seed: Optional[int] = None,
|
|
1938
|
-
stop: Optional[
|
|
1939
|
-
stop_sequences: Optional[
|
|
2029
|
+
stop: Optional[list[str]] = None,
|
|
2030
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1940
2031
|
temperature: Optional[float] = None,
|
|
1941
2032
|
top_k: Optional[int] = None,
|
|
1942
2033
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1965,8 +2056,8 @@ class InferenceClient:
|
|
|
1965
2056
|
repetition_penalty: Optional[float] = None,
|
|
1966
2057
|
return_full_text: Optional[bool] = None,
|
|
1967
2058
|
seed: Optional[int] = None,
|
|
1968
|
-
stop: Optional[
|
|
1969
|
-
stop_sequences: Optional[
|
|
2059
|
+
stop: Optional[list[str]] = None,
|
|
2060
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1970
2061
|
temperature: Optional[float] = None,
|
|
1971
2062
|
top_k: Optional[int] = None,
|
|
1972
2063
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1995,8 +2086,8 @@ class InferenceClient:
|
|
|
1995
2086
|
repetition_penalty: Optional[float] = None,
|
|
1996
2087
|
return_full_text: Optional[bool] = None,
|
|
1997
2088
|
seed: Optional[int] = None,
|
|
1998
|
-
stop: Optional[
|
|
1999
|
-
stop_sequences: Optional[
|
|
2089
|
+
stop: Optional[list[str]] = None,
|
|
2090
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2000
2091
|
temperature: Optional[float] = None,
|
|
2001
2092
|
top_k: Optional[int] = None,
|
|
2002
2093
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2024,8 +2115,8 @@ class InferenceClient:
|
|
|
2024
2115
|
repetition_penalty: Optional[float] = None,
|
|
2025
2116
|
return_full_text: Optional[bool] = None,
|
|
2026
2117
|
seed: Optional[int] = None,
|
|
2027
|
-
stop: Optional[
|
|
2028
|
-
stop_sequences: Optional[
|
|
2118
|
+
stop: Optional[list[str]] = None,
|
|
2119
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2029
2120
|
temperature: Optional[float] = None,
|
|
2030
2121
|
top_k: Optional[int] = None,
|
|
2031
2122
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2081,9 +2172,9 @@ class InferenceClient:
|
|
|
2081
2172
|
Whether to prepend the prompt to the generated text
|
|
2082
2173
|
seed (`int`, *optional*):
|
|
2083
2174
|
Random sampling seed
|
|
2084
|
-
stop (`
|
|
2175
|
+
stop (`list[str]`, *optional*):
|
|
2085
2176
|
Stop generating tokens if a member of `stop` is generated.
|
|
2086
|
-
stop_sequences (`
|
|
2177
|
+
stop_sequences (`list[str]`, *optional*):
|
|
2087
2178
|
Deprecated argument. Use `stop` instead.
|
|
2088
2179
|
temperature (`float`, *optional*):
|
|
2089
2180
|
The value used to module the logits distribution.
|
|
@@ -2116,7 +2207,7 @@ class InferenceClient:
|
|
|
2116
2207
|
If input values are not valid. No HTTP call is made to the server.
|
|
2117
2208
|
[`InferenceTimeoutError`]:
|
|
2118
2209
|
If the model is unavailable or the request times out.
|
|
2119
|
-
`
|
|
2210
|
+
[`HfHubHTTPError`]:
|
|
2120
2211
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2121
2212
|
|
|
2122
2213
|
Example:
|
|
@@ -2305,7 +2396,7 @@ class InferenceClient:
|
|
|
2305
2396
|
# Handle errors separately for more precise error messages
|
|
2306
2397
|
try:
|
|
2307
2398
|
bytes_output = self._inner_post(request_parameters, stream=stream or False)
|
|
2308
|
-
except
|
|
2399
|
+
except HfHubHTTPError as e:
|
|
2309
2400
|
match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
|
|
2310
2401
|
if isinstance(e, BadRequestError) and match:
|
|
2311
2402
|
unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
|
|
@@ -2360,7 +2451,7 @@ class InferenceClient:
|
|
|
2360
2451
|
model: Optional[str] = None,
|
|
2361
2452
|
scheduler: Optional[str] = None,
|
|
2362
2453
|
seed: Optional[int] = None,
|
|
2363
|
-
extra_body: Optional[
|
|
2454
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2364
2455
|
) -> "Image":
|
|
2365
2456
|
"""
|
|
2366
2457
|
Generate an image based on a given text using a specified model.
|
|
@@ -2398,7 +2489,7 @@ class InferenceClient:
|
|
|
2398
2489
|
Override the scheduler with a compatible one.
|
|
2399
2490
|
seed (`int`, *optional*):
|
|
2400
2491
|
Seed for the random number generator.
|
|
2401
|
-
extra_body (`
|
|
2492
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2402
2493
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2403
2494
|
for supported parameters.
|
|
2404
2495
|
|
|
@@ -2408,7 +2499,7 @@ class InferenceClient:
|
|
|
2408
2499
|
Raises:
|
|
2409
2500
|
[`InferenceTimeoutError`]:
|
|
2410
2501
|
If the model is unavailable or the request times out.
|
|
2411
|
-
`
|
|
2502
|
+
[`HfHubHTTPError`]:
|
|
2412
2503
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2413
2504
|
|
|
2414
2505
|
Example:
|
|
@@ -2497,11 +2588,11 @@ class InferenceClient:
|
|
|
2497
2588
|
*,
|
|
2498
2589
|
model: Optional[str] = None,
|
|
2499
2590
|
guidance_scale: Optional[float] = None,
|
|
2500
|
-
negative_prompt: Optional[
|
|
2591
|
+
negative_prompt: Optional[list[str]] = None,
|
|
2501
2592
|
num_frames: Optional[float] = None,
|
|
2502
2593
|
num_inference_steps: Optional[int] = None,
|
|
2503
2594
|
seed: Optional[int] = None,
|
|
2504
|
-
extra_body: Optional[
|
|
2595
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2505
2596
|
) -> bytes:
|
|
2506
2597
|
"""
|
|
2507
2598
|
Generate a video based on a given text.
|
|
@@ -2520,7 +2611,7 @@ class InferenceClient:
|
|
|
2520
2611
|
guidance_scale (`float`, *optional*):
|
|
2521
2612
|
A higher guidance scale value encourages the model to generate videos closely linked to the text
|
|
2522
2613
|
prompt, but values too high may cause saturation and other artifacts.
|
|
2523
|
-
negative_prompt (`
|
|
2614
|
+
negative_prompt (`list[str]`, *optional*):
|
|
2524
2615
|
One or several prompt to guide what NOT to include in video generation.
|
|
2525
2616
|
num_frames (`float`, *optional*):
|
|
2526
2617
|
The num_frames parameter determines how many video frames are generated.
|
|
@@ -2529,7 +2620,7 @@ class InferenceClient:
|
|
|
2529
2620
|
expense of slower inference.
|
|
2530
2621
|
seed (`int`, *optional*):
|
|
2531
2622
|
Seed for the random number generator.
|
|
2532
|
-
extra_body (`
|
|
2623
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2533
2624
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2534
2625
|
for supported parameters.
|
|
2535
2626
|
|
|
@@ -2609,7 +2700,7 @@ class InferenceClient:
|
|
|
2609
2700
|
top_p: Optional[float] = None,
|
|
2610
2701
|
typical_p: Optional[float] = None,
|
|
2611
2702
|
use_cache: Optional[bool] = None,
|
|
2612
|
-
extra_body: Optional[
|
|
2703
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2613
2704
|
) -> bytes:
|
|
2614
2705
|
"""
|
|
2615
2706
|
Synthesize an audio of a voice pronouncing a given text.
|
|
@@ -2671,7 +2762,7 @@ class InferenceClient:
|
|
|
2671
2762
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2672
2763
|
use_cache (`bool`, *optional*):
|
|
2673
2764
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2674
|
-
extra_body (`
|
|
2765
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2675
2766
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2676
2767
|
for supported parameters.
|
|
2677
2768
|
Returns:
|
|
@@ -2680,7 +2771,7 @@ class InferenceClient:
|
|
|
2680
2771
|
Raises:
|
|
2681
2772
|
[`InferenceTimeoutError`]:
|
|
2682
2773
|
If the model is unavailable or the request times out.
|
|
2683
|
-
`
|
|
2774
|
+
[`HfHubHTTPError`]:
|
|
2684
2775
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2685
2776
|
|
|
2686
2777
|
Example:
|
|
@@ -2803,9 +2894,9 @@ class InferenceClient:
|
|
|
2803
2894
|
*,
|
|
2804
2895
|
model: Optional[str] = None,
|
|
2805
2896
|
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2806
|
-
ignore_labels: Optional[
|
|
2897
|
+
ignore_labels: Optional[list[str]] = None,
|
|
2807
2898
|
stride: Optional[int] = None,
|
|
2808
|
-
) ->
|
|
2899
|
+
) -> list[TokenClassificationOutputElement]:
|
|
2809
2900
|
"""
|
|
2810
2901
|
Perform token classification on the given text.
|
|
2811
2902
|
Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
|
|
@@ -2819,18 +2910,18 @@ class InferenceClient:
|
|
|
2819
2910
|
Defaults to None.
|
|
2820
2911
|
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2821
2912
|
The strategy used to fuse tokens based on model predictions
|
|
2822
|
-
ignore_labels (`
|
|
2913
|
+
ignore_labels (`list[str`, *optional*):
|
|
2823
2914
|
A list of labels to ignore
|
|
2824
2915
|
stride (`int`, *optional*):
|
|
2825
2916
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2826
2917
|
|
|
2827
2918
|
Returns:
|
|
2828
|
-
`
|
|
2919
|
+
`list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
|
|
2829
2920
|
|
|
2830
2921
|
Raises:
|
|
2831
2922
|
[`InferenceTimeoutError`]:
|
|
2832
2923
|
If the model is unavailable or the request times out.
|
|
2833
|
-
`
|
|
2924
|
+
[`HfHubHTTPError`]:
|
|
2834
2925
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2835
2926
|
|
|
2836
2927
|
Example:
|
|
@@ -2881,7 +2972,7 @@ class InferenceClient:
|
|
|
2881
2972
|
tgt_lang: Optional[str] = None,
|
|
2882
2973
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2883
2974
|
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
2884
|
-
generate_parameters: Optional[
|
|
2975
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
2885
2976
|
) -> TranslationOutput:
|
|
2886
2977
|
"""
|
|
2887
2978
|
Convert text from one language to another.
|
|
@@ -2906,7 +2997,7 @@ class InferenceClient:
|
|
|
2906
2997
|
Whether to clean up the potential extra spaces in the text output.
|
|
2907
2998
|
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
2908
2999
|
The truncation strategy to use.
|
|
2909
|
-
generate_parameters (`
|
|
3000
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
2910
3001
|
Additional parametrization of the text generation algorithm.
|
|
2911
3002
|
|
|
2912
3003
|
Returns:
|
|
@@ -2915,7 +3006,7 @@ class InferenceClient:
|
|
|
2915
3006
|
Raises:
|
|
2916
3007
|
[`InferenceTimeoutError`]:
|
|
2917
3008
|
If the model is unavailable or the request times out.
|
|
2918
|
-
`
|
|
3009
|
+
[`HfHubHTTPError`]:
|
|
2919
3010
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2920
3011
|
`ValueError`:
|
|
2921
3012
|
If only one of the `src_lang` and `tgt_lang` arguments are provided.
|
|
@@ -2968,7 +3059,7 @@ class InferenceClient:
|
|
|
2968
3059
|
*,
|
|
2969
3060
|
model: Optional[str] = None,
|
|
2970
3061
|
top_k: Optional[int] = None,
|
|
2971
|
-
) ->
|
|
3062
|
+
) -> list[VisualQuestionAnsweringOutputElement]:
|
|
2972
3063
|
"""
|
|
2973
3064
|
Answering open-ended questions based on an image.
|
|
2974
3065
|
|
|
@@ -2985,12 +3076,12 @@ class InferenceClient:
|
|
|
2985
3076
|
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
2986
3077
|
topk answers if there are not enough options available within the context.
|
|
2987
3078
|
Returns:
|
|
2988
|
-
`
|
|
3079
|
+
`list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
2989
3080
|
|
|
2990
3081
|
Raises:
|
|
2991
3082
|
`InferenceTimeoutError`:
|
|
2992
3083
|
If the model is unavailable or the request times out.
|
|
2993
|
-
`
|
|
3084
|
+
[`HfHubHTTPError`]:
|
|
2994
3085
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2995
3086
|
|
|
2996
3087
|
Example:
|
|
@@ -3023,21 +3114,21 @@ class InferenceClient:
|
|
|
3023
3114
|
def zero_shot_classification(
|
|
3024
3115
|
self,
|
|
3025
3116
|
text: str,
|
|
3026
|
-
candidate_labels:
|
|
3117
|
+
candidate_labels: list[str],
|
|
3027
3118
|
*,
|
|
3028
3119
|
multi_label: Optional[bool] = False,
|
|
3029
3120
|
hypothesis_template: Optional[str] = None,
|
|
3030
3121
|
model: Optional[str] = None,
|
|
3031
|
-
) ->
|
|
3122
|
+
) -> list[ZeroShotClassificationOutputElement]:
|
|
3032
3123
|
"""
|
|
3033
3124
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
3034
3125
|
|
|
3035
3126
|
Args:
|
|
3036
3127
|
text (`str`):
|
|
3037
3128
|
The input text to classify.
|
|
3038
|
-
candidate_labels (`
|
|
3129
|
+
candidate_labels (`list[str]`):
|
|
3039
3130
|
The set of possible class labels to classify the text into.
|
|
3040
|
-
labels (`
|
|
3131
|
+
labels (`list[str]`, *optional*):
|
|
3041
3132
|
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
3042
3133
|
multi_label (`bool`, *optional*):
|
|
3043
3134
|
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
@@ -3052,12 +3143,12 @@ class InferenceClient:
|
|
|
3052
3143
|
|
|
3053
3144
|
|
|
3054
3145
|
Returns:
|
|
3055
|
-
`
|
|
3146
|
+
`list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3056
3147
|
|
|
3057
3148
|
Raises:
|
|
3058
3149
|
[`InferenceTimeoutError`]:
|
|
3059
3150
|
If the model is unavailable or the request times out.
|
|
3060
|
-
`
|
|
3151
|
+
[`HfHubHTTPError`]:
|
|
3061
3152
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3062
3153
|
|
|
3063
3154
|
Example with `multi_label=False`:
|
|
@@ -3129,22 +3220,22 @@ class InferenceClient:
|
|
|
3129
3220
|
def zero_shot_image_classification(
|
|
3130
3221
|
self,
|
|
3131
3222
|
image: ContentT,
|
|
3132
|
-
candidate_labels:
|
|
3223
|
+
candidate_labels: list[str],
|
|
3133
3224
|
*,
|
|
3134
3225
|
model: Optional[str] = None,
|
|
3135
3226
|
hypothesis_template: Optional[str] = None,
|
|
3136
3227
|
# deprecated argument
|
|
3137
|
-
labels:
|
|
3138
|
-
) ->
|
|
3228
|
+
labels: list[str] = None, # type: ignore
|
|
3229
|
+
) -> list[ZeroShotImageClassificationOutputElement]:
|
|
3139
3230
|
"""
|
|
3140
3231
|
Provide input image and text labels to predict text labels for the image.
|
|
3141
3232
|
|
|
3142
3233
|
Args:
|
|
3143
3234
|
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3144
3235
|
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3145
|
-
candidate_labels (`
|
|
3236
|
+
candidate_labels (`list[str]`):
|
|
3146
3237
|
The candidate labels for this image
|
|
3147
|
-
labels (`
|
|
3238
|
+
labels (`list[str]`, *optional*):
|
|
3148
3239
|
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
3149
3240
|
model (`str`, *optional*):
|
|
3150
3241
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
@@ -3154,12 +3245,12 @@ class InferenceClient:
|
|
|
3154
3245
|
replacing the placeholder with the candidate labels.
|
|
3155
3246
|
|
|
3156
3247
|
Returns:
|
|
3157
|
-
`
|
|
3248
|
+
`list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3158
3249
|
|
|
3159
3250
|
Raises:
|
|
3160
3251
|
[`InferenceTimeoutError`]:
|
|
3161
3252
|
If the model is unavailable or the request times out.
|
|
3162
|
-
`
|
|
3253
|
+
[`HfHubHTTPError`]:
|
|
3163
3254
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3164
3255
|
|
|
3165
3256
|
Example:
|
|
@@ -3193,102 +3284,7 @@ class InferenceClient:
|
|
|
3193
3284
|
response = self._inner_post(request_parameters)
|
|
3194
3285
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3195
3286
|
|
|
3196
|
-
|
|
3197
|
-
version="0.35.0",
|
|
3198
|
-
message=(
|
|
3199
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3200
|
-
" Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
|
|
3201
|
-
),
|
|
3202
|
-
)
|
|
3203
|
-
def list_deployed_models(
|
|
3204
|
-
self, frameworks: Union[None, str, Literal["all"], List[str]] = None
|
|
3205
|
-
) -> Dict[str, List[str]]:
|
|
3206
|
-
"""
|
|
3207
|
-
List models deployed on the HF Serverless Inference API service.
|
|
3208
|
-
|
|
3209
|
-
This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
|
|
3210
|
-
are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
|
|
3211
|
-
specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
|
|
3212
|
-
in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
|
|
3213
|
-
frameworks are checked, the more time it will take.
|
|
3214
|
-
|
|
3215
|
-
<Tip warning={true}>
|
|
3216
|
-
|
|
3217
|
-
This endpoint method does not return a live list of all models available for the HF Inference API service.
|
|
3218
|
-
It searches over a cached list of models that were recently available and the list may not be up to date.
|
|
3219
|
-
If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
|
|
3220
|
-
|
|
3221
|
-
</Tip>
|
|
3222
|
-
|
|
3223
|
-
<Tip>
|
|
3224
|
-
|
|
3225
|
-
This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
|
|
3226
|
-
check its availability, you can directly use [`~InferenceClient.get_model_status`].
|
|
3227
|
-
|
|
3228
|
-
</Tip>
|
|
3229
|
-
|
|
3230
|
-
Args:
|
|
3231
|
-
frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
|
|
3232
|
-
The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
|
|
3233
|
-
"all", all available frameworks will be tested. It is also possible to provide a single framework or a
|
|
3234
|
-
custom set of frameworks to check.
|
|
3235
|
-
|
|
3236
|
-
Returns:
|
|
3237
|
-
`Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
|
|
3238
|
-
|
|
3239
|
-
Example:
|
|
3240
|
-
```python
|
|
3241
|
-
>>> from huggingface_hub import InferenceClient
|
|
3242
|
-
>>> client = InferenceClient()
|
|
3243
|
-
|
|
3244
|
-
# Discover zero-shot-classification models currently deployed
|
|
3245
|
-
>>> models = client.list_deployed_models()
|
|
3246
|
-
>>> models["zero-shot-classification"]
|
|
3247
|
-
['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
|
|
3248
|
-
|
|
3249
|
-
# List from only 1 framework
|
|
3250
|
-
>>> client.list_deployed_models("text-generation-inference")
|
|
3251
|
-
{'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
|
|
3252
|
-
```
|
|
3253
|
-
"""
|
|
3254
|
-
if self.provider != "hf-inference":
|
|
3255
|
-
raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
|
|
3256
|
-
|
|
3257
|
-
# Resolve which frameworks to check
|
|
3258
|
-
if frameworks is None:
|
|
3259
|
-
frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
|
|
3260
|
-
elif frameworks == "all":
|
|
3261
|
-
frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
|
|
3262
|
-
elif isinstance(frameworks, str):
|
|
3263
|
-
frameworks = [frameworks]
|
|
3264
|
-
frameworks = list(set(frameworks))
|
|
3265
|
-
|
|
3266
|
-
# Fetch them iteratively
|
|
3267
|
-
models_by_task: Dict[str, List[str]] = {}
|
|
3268
|
-
|
|
3269
|
-
def _unpack_response(framework: str, items: List[Dict]) -> None:
|
|
3270
|
-
for model in items:
|
|
3271
|
-
if framework == "sentence-transformers":
|
|
3272
|
-
# Model running with the `sentence-transformers` framework can work with both tasks even if not
|
|
3273
|
-
# branded as such in the API response
|
|
3274
|
-
models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
|
|
3275
|
-
models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
|
|
3276
|
-
else:
|
|
3277
|
-
models_by_task.setdefault(model["task"], []).append(model["model_id"])
|
|
3278
|
-
|
|
3279
|
-
for framework in frameworks:
|
|
3280
|
-
response = get_session().get(
|
|
3281
|
-
f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3282
|
-
)
|
|
3283
|
-
hf_raise_for_status(response)
|
|
3284
|
-
_unpack_response(framework, response.json())
|
|
3285
|
-
|
|
3286
|
-
# Sort alphabetically for discoverability and return
|
|
3287
|
-
for task, models in models_by_task.items():
|
|
3288
|
-
models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
|
|
3289
|
-
return models_by_task
|
|
3290
|
-
|
|
3291
|
-
def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
|
|
3287
|
+
def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
|
|
3292
3288
|
"""
|
|
3293
3289
|
Get information about the deployed endpoint.
|
|
3294
3290
|
|
|
@@ -3301,7 +3297,7 @@ class InferenceClient:
|
|
|
3301
3297
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
3302
3298
|
|
|
3303
3299
|
Returns:
|
|
3304
|
-
`
|
|
3300
|
+
`dict[str, Any]`: Information about the endpoint.
|
|
3305
3301
|
|
|
3306
3302
|
Example:
|
|
3307
3303
|
```py
|
|
@@ -3351,7 +3347,6 @@ class InferenceClient:
|
|
|
3351
3347
|
Check the health of the deployed endpoint.
|
|
3352
3348
|
|
|
3353
3349
|
Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
|
|
3354
|
-
For Inference API, please use [`InferenceClient.get_model_status`] instead.
|
|
3355
3350
|
|
|
3356
3351
|
Args:
|
|
3357
3352
|
model (`str`, *optional*):
|
|
@@ -3375,75 +3370,12 @@ class InferenceClient:
|
|
|
3375
3370
|
if model is None:
|
|
3376
3371
|
raise ValueError("Model id not provided.")
|
|
3377
3372
|
if not model.startswith(("http://", "https://")):
|
|
3378
|
-
raise ValueError(
|
|
3379
|
-
"Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
|
|
3380
|
-
)
|
|
3373
|
+
raise ValueError("Model must be an Inference Endpoint URL.")
|
|
3381
3374
|
url = model.rstrip("/") + "/health"
|
|
3382
3375
|
|
|
3383
3376
|
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3384
3377
|
return response.status_code == 200
|
|
3385
3378
|
|
|
3386
|
-
@_deprecate_method(
|
|
3387
|
-
version="0.35.0",
|
|
3388
|
-
message=(
|
|
3389
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3390
|
-
" Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
|
|
3391
|
-
),
|
|
3392
|
-
)
|
|
3393
|
-
def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
|
|
3394
|
-
"""
|
|
3395
|
-
Get the status of a model hosted on the HF Inference API.
|
|
3396
|
-
|
|
3397
|
-
<Tip>
|
|
3398
|
-
|
|
3399
|
-
This endpoint is mostly useful when you already know which model you want to use and want to check its
|
|
3400
|
-
availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
|
|
3401
|
-
|
|
3402
|
-
</Tip>
|
|
3403
|
-
|
|
3404
|
-
Args:
|
|
3405
|
-
model (`str`, *optional*):
|
|
3406
|
-
Identifier of the model for witch the status gonna be checked. If model is not provided,
|
|
3407
|
-
the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
|
|
3408
|
-
identifier cannot be a URL.
|
|
3409
|
-
|
|
3410
|
-
|
|
3411
|
-
Returns:
|
|
3412
|
-
[`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
|
|
3413
|
-
about the state of the model: load, state, compute type and framework.
|
|
3414
|
-
|
|
3415
|
-
Example:
|
|
3416
|
-
```py
|
|
3417
|
-
>>> from huggingface_hub import InferenceClient
|
|
3418
|
-
>>> client = InferenceClient()
|
|
3419
|
-
>>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
|
|
3420
|
-
ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
|
|
3421
|
-
```
|
|
3422
|
-
"""
|
|
3423
|
-
if self.provider != "hf-inference":
|
|
3424
|
-
raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
|
|
3425
|
-
|
|
3426
|
-
model = model or self.model
|
|
3427
|
-
if model is None:
|
|
3428
|
-
raise ValueError("Model id not provided.")
|
|
3429
|
-
if model.startswith("https://"):
|
|
3430
|
-
raise NotImplementedError("Model status is only available for Inference API endpoints.")
|
|
3431
|
-
url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
|
|
3432
|
-
|
|
3433
|
-
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3434
|
-
hf_raise_for_status(response)
|
|
3435
|
-
response_data = response.json()
|
|
3436
|
-
|
|
3437
|
-
if "error" in response_data:
|
|
3438
|
-
raise ValueError(response_data["error"])
|
|
3439
|
-
|
|
3440
|
-
return ModelStatus(
|
|
3441
|
-
loaded=response_data["loaded"],
|
|
3442
|
-
state=response_data["state"],
|
|
3443
|
-
compute_type=response_data["compute_type"],
|
|
3444
|
-
framework=response_data["framework"],
|
|
3445
|
-
)
|
|
3446
|
-
|
|
3447
3379
|
@property
|
|
3448
3380
|
def chat(self) -> "ProxyClientChat":
|
|
3449
3381
|
return ProxyClientChat(self)
|