huggingface-hub 0.34.4__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +46 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +167 -10
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +13 -14
- huggingface_hub/_upload_large_folder.py +15 -15
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/_cli_utils.py +2 -2
- huggingface_hub/cli/auth.py +5 -6
- huggingface_hub/cli/cache.py +14 -20
- huggingface_hub/cli/download.py +4 -4
- huggingface_hub/cli/jobs.py +560 -11
- huggingface_hub/cli/lfs.py +4 -4
- huggingface_hub/cli/repo.py +7 -7
- huggingface_hub/cli/repo_files.py +2 -2
- huggingface_hub/cli/upload.py +4 -4
- huggingface_hub/cli/upload_large_folder.py +3 -3
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +13 -13
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +5 -6
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -19
- huggingface_hub/errors.py +42 -29
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +153 -252
- huggingface_hub/hf_api.py +815 -600
- huggingface_hub/hf_file_system.py +98 -62
- huggingface_hub/hub_mixin.py +37 -57
- huggingface_hub/inference/_client.py +177 -325
- huggingface_hub/inference/_common.py +110 -124
- huggingface_hub/inference/_generated/_async_client.py +226 -432
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +2 -3
- huggingface_hub/inference/_mcp/mcp_client.py +58 -30
- huggingface_hub/inference/_mcp/types.py +10 -7
- huggingface_hub/inference/_mcp/utils.py +11 -7
- huggingface_hub/inference/_providers/__init__.py +2 -2
- huggingface_hub/inference/_providers/_common.py +49 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +28 -20
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +20 -31
- huggingface_hub/repocard.py +18 -18
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +4 -4
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +366 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +15 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +32 -11
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
- huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.34.4.dist-info/RECORD +0 -166
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -34,18 +34,17 @@
|
|
|
34
34
|
# - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
|
|
35
35
|
import base64
|
|
36
36
|
import logging
|
|
37
|
+
import os
|
|
37
38
|
import re
|
|
38
39
|
import warnings
|
|
39
|
-
from
|
|
40
|
-
|
|
41
|
-
from requests import HTTPError
|
|
40
|
+
from contextlib import ExitStack
|
|
41
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
|
|
42
42
|
|
|
43
43
|
from huggingface_hub import constants
|
|
44
|
-
from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
|
|
44
|
+
from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
|
|
45
45
|
from huggingface_hub.inference._common import (
|
|
46
46
|
TASKS_EXPECTING_IMAGES,
|
|
47
47
|
ContentT,
|
|
48
|
-
ModelStatus,
|
|
49
48
|
RequestParameters,
|
|
50
49
|
_b64_encode,
|
|
51
50
|
_b64_to_image,
|
|
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
|
|
|
54
53
|
_bytes_to_list,
|
|
55
54
|
_get_unsupported_text_generation_kwargs,
|
|
56
55
|
_import_numpy,
|
|
57
|
-
_open_as_binary,
|
|
58
56
|
_set_unsupported_text_generation_kwargs,
|
|
59
57
|
_stream_chat_completion_response,
|
|
60
58
|
_stream_text_generation_response,
|
|
@@ -103,9 +101,13 @@ from huggingface_hub.inference._generated.types import (
|
|
|
103
101
|
ZeroShotImageClassificationOutputElement,
|
|
104
102
|
)
|
|
105
103
|
from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
|
|
106
|
-
from huggingface_hub.utils import
|
|
104
|
+
from huggingface_hub.utils import (
|
|
105
|
+
build_hf_headers,
|
|
106
|
+
get_session,
|
|
107
|
+
hf_raise_for_status,
|
|
108
|
+
validate_hf_hub_args,
|
|
109
|
+
)
|
|
107
110
|
from huggingface_hub.utils._auth import get_token
|
|
108
|
-
from huggingface_hub.utils._deprecation import _deprecate_method
|
|
109
111
|
|
|
110
112
|
|
|
111
113
|
if TYPE_CHECKING:
|
|
@@ -142,16 +144,14 @@ class InferenceClient:
|
|
|
142
144
|
arguments are mutually exclusive and have the exact same behavior.
|
|
143
145
|
timeout (`float`, `optional`):
|
|
144
146
|
The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
|
|
145
|
-
headers (`
|
|
147
|
+
headers (`dict[str, str]`, `optional`):
|
|
146
148
|
Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
|
|
147
149
|
Values in this dictionary will override the default values.
|
|
148
150
|
bill_to (`str`, `optional`):
|
|
149
151
|
The billing account to use for the requests. By default the requests are billed on the user's account.
|
|
150
152
|
Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
|
|
151
|
-
cookies (`
|
|
153
|
+
cookies (`dict[str, str]`, `optional`):
|
|
152
154
|
Additional cookies to send to the server.
|
|
153
|
-
proxies (`Any`, `optional`):
|
|
154
|
-
Proxies to use for the request.
|
|
155
155
|
base_url (`str`, `optional`):
|
|
156
156
|
Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
|
|
157
157
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
|
|
@@ -160,6 +160,7 @@ class InferenceClient:
|
|
|
160
160
|
follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
|
|
161
161
|
"""
|
|
162
162
|
|
|
163
|
+
@validate_hf_hub_args
|
|
163
164
|
def __init__(
|
|
164
165
|
self,
|
|
165
166
|
model: Optional[str] = None,
|
|
@@ -167,9 +168,8 @@ class InferenceClient:
|
|
|
167
168
|
provider: Optional[PROVIDER_OR_POLICY_T] = None,
|
|
168
169
|
token: Optional[str] = None,
|
|
169
170
|
timeout: Optional[float] = None,
|
|
170
|
-
headers: Optional[
|
|
171
|
-
cookies: Optional[
|
|
172
|
-
proxies: Optional[Any] = None,
|
|
171
|
+
headers: Optional[dict[str, str]] = None,
|
|
172
|
+
cookies: Optional[dict[str, str]] = None,
|
|
173
173
|
bill_to: Optional[str] = None,
|
|
174
174
|
# OpenAI compatibility
|
|
175
175
|
base_url: Optional[str] = None,
|
|
@@ -231,11 +231,21 @@ class InferenceClient:
|
|
|
231
231
|
|
|
232
232
|
self.cookies = cookies
|
|
233
233
|
self.timeout = timeout
|
|
234
|
-
|
|
234
|
+
|
|
235
|
+
self.exit_stack = ExitStack()
|
|
235
236
|
|
|
236
237
|
def __repr__(self):
|
|
237
238
|
return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
|
|
238
239
|
|
|
240
|
+
def __enter__(self):
|
|
241
|
+
return self
|
|
242
|
+
|
|
243
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
244
|
+
self.exit_stack.close()
|
|
245
|
+
|
|
246
|
+
def close(self):
|
|
247
|
+
self.exit_stack.close()
|
|
248
|
+
|
|
239
249
|
@overload
|
|
240
250
|
def _inner_post( # type: ignore[misc]
|
|
241
251
|
self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
|
|
@@ -244,45 +254,46 @@ class InferenceClient:
|
|
|
244
254
|
@overload
|
|
245
255
|
def _inner_post( # type: ignore[misc]
|
|
246
256
|
self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
|
|
247
|
-
) -> Iterable[
|
|
257
|
+
) -> Iterable[str]: ...
|
|
248
258
|
|
|
249
259
|
@overload
|
|
250
260
|
def _inner_post(
|
|
251
261
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
252
|
-
) -> Union[bytes, Iterable[
|
|
262
|
+
) -> Union[bytes, Iterable[str]]: ...
|
|
253
263
|
|
|
254
264
|
def _inner_post(
|
|
255
265
|
self, request_parameters: RequestParameters, *, stream: bool = False
|
|
256
|
-
) -> Union[bytes, Iterable[
|
|
266
|
+
) -> Union[bytes, Iterable[str]]:
|
|
257
267
|
"""Make a request to the inference server."""
|
|
258
268
|
# TODO: this should be handled in provider helpers directly
|
|
259
269
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
260
270
|
request_parameters.headers["Accept"] = "image/png"
|
|
261
271
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
272
|
+
try:
|
|
273
|
+
response = self.exit_stack.enter_context(
|
|
274
|
+
get_session().stream(
|
|
275
|
+
"POST",
|
|
265
276
|
request_parameters.url,
|
|
266
277
|
json=request_parameters.json,
|
|
267
|
-
data
|
|
278
|
+
content=request_parameters.data,
|
|
268
279
|
headers=request_parameters.headers,
|
|
269
280
|
cookies=self.cookies,
|
|
270
281
|
timeout=self.timeout,
|
|
271
|
-
stream=stream,
|
|
272
|
-
proxies=self.proxies,
|
|
273
282
|
)
|
|
274
|
-
|
|
275
|
-
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
276
|
-
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
277
|
-
|
|
278
|
-
try:
|
|
283
|
+
)
|
|
279
284
|
hf_raise_for_status(response)
|
|
280
|
-
|
|
281
|
-
|
|
285
|
+
if stream:
|
|
286
|
+
return response.iter_lines()
|
|
287
|
+
else:
|
|
288
|
+
return response.read()
|
|
289
|
+
except TimeoutError as error:
|
|
290
|
+
# Convert any `TimeoutError` to a `InferenceTimeoutError`
|
|
291
|
+
raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
|
|
292
|
+
except HfHubHTTPError as error:
|
|
282
293
|
if error.response.status_code == 422 and request_parameters.task != "unknown":
|
|
283
294
|
msg = str(error.args[0])
|
|
284
295
|
if len(error.response.text) > 0:
|
|
285
|
-
msg += f"
|
|
296
|
+
msg += f"{os.linesep}{error.response.text}{os.linesep}"
|
|
286
297
|
error.args = (msg,) + error.args[1:]
|
|
287
298
|
raise
|
|
288
299
|
|
|
@@ -293,7 +304,7 @@ class InferenceClient:
|
|
|
293
304
|
model: Optional[str] = None,
|
|
294
305
|
top_k: Optional[int] = None,
|
|
295
306
|
function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
|
|
296
|
-
) ->
|
|
307
|
+
) -> list[AudioClassificationOutputElement]:
|
|
297
308
|
"""
|
|
298
309
|
Perform audio classification on the provided audio content.
|
|
299
310
|
|
|
@@ -311,12 +322,12 @@ class InferenceClient:
|
|
|
311
322
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
312
323
|
|
|
313
324
|
Returns:
|
|
314
|
-
`
|
|
325
|
+
`list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
315
326
|
|
|
316
327
|
Raises:
|
|
317
328
|
[`InferenceTimeoutError`]:
|
|
318
329
|
If the model is unavailable or the request times out.
|
|
319
|
-
`
|
|
330
|
+
[`HfHubHTTPError`]:
|
|
320
331
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
321
332
|
|
|
322
333
|
Example:
|
|
@@ -348,7 +359,7 @@ class InferenceClient:
|
|
|
348
359
|
audio: ContentT,
|
|
349
360
|
*,
|
|
350
361
|
model: Optional[str] = None,
|
|
351
|
-
) ->
|
|
362
|
+
) -> list[AudioToAudioOutputElement]:
|
|
352
363
|
"""
|
|
353
364
|
Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
|
|
354
365
|
|
|
@@ -362,12 +373,12 @@ class InferenceClient:
|
|
|
362
373
|
audio_to_audio will be used.
|
|
363
374
|
|
|
364
375
|
Returns:
|
|
365
|
-
`
|
|
376
|
+
`list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
|
|
366
377
|
|
|
367
378
|
Raises:
|
|
368
379
|
`InferenceTimeoutError`:
|
|
369
380
|
If the model is unavailable or the request times out.
|
|
370
|
-
`
|
|
381
|
+
[`HfHubHTTPError`]:
|
|
371
382
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
372
383
|
|
|
373
384
|
Example:
|
|
@@ -400,7 +411,7 @@ class InferenceClient:
|
|
|
400
411
|
audio: ContentT,
|
|
401
412
|
*,
|
|
402
413
|
model: Optional[str] = None,
|
|
403
|
-
extra_body: Optional[
|
|
414
|
+
extra_body: Optional[dict] = None,
|
|
404
415
|
) -> AutomaticSpeechRecognitionOutput:
|
|
405
416
|
"""
|
|
406
417
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -411,7 +422,7 @@ class InferenceClient:
|
|
|
411
422
|
model (`str`, *optional*):
|
|
412
423
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
413
424
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
414
|
-
extra_body (`
|
|
425
|
+
extra_body (`dict`, *optional*):
|
|
415
426
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
416
427
|
for supported parameters.
|
|
417
428
|
Returns:
|
|
@@ -420,7 +431,7 @@ class InferenceClient:
|
|
|
420
431
|
Raises:
|
|
421
432
|
[`InferenceTimeoutError`]:
|
|
422
433
|
If the model is unavailable or the request times out.
|
|
423
|
-
`
|
|
434
|
+
[`HfHubHTTPError`]:
|
|
424
435
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
425
436
|
|
|
426
437
|
Example:
|
|
@@ -446,105 +457,105 @@ class InferenceClient:
|
|
|
446
457
|
@overload
|
|
447
458
|
def chat_completion( # type: ignore
|
|
448
459
|
self,
|
|
449
|
-
messages:
|
|
460
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
450
461
|
*,
|
|
451
462
|
model: Optional[str] = None,
|
|
452
463
|
stream: Literal[False] = False,
|
|
453
464
|
frequency_penalty: Optional[float] = None,
|
|
454
|
-
logit_bias: Optional[
|
|
465
|
+
logit_bias: Optional[list[float]] = None,
|
|
455
466
|
logprobs: Optional[bool] = None,
|
|
456
467
|
max_tokens: Optional[int] = None,
|
|
457
468
|
n: Optional[int] = None,
|
|
458
469
|
presence_penalty: Optional[float] = None,
|
|
459
470
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
460
471
|
seed: Optional[int] = None,
|
|
461
|
-
stop: Optional[
|
|
472
|
+
stop: Optional[list[str]] = None,
|
|
462
473
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
463
474
|
temperature: Optional[float] = None,
|
|
464
475
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
465
476
|
tool_prompt: Optional[str] = None,
|
|
466
|
-
tools: Optional[
|
|
477
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
467
478
|
top_logprobs: Optional[int] = None,
|
|
468
479
|
top_p: Optional[float] = None,
|
|
469
|
-
extra_body: Optional[
|
|
480
|
+
extra_body: Optional[dict] = None,
|
|
470
481
|
) -> ChatCompletionOutput: ...
|
|
471
482
|
|
|
472
483
|
@overload
|
|
473
484
|
def chat_completion( # type: ignore
|
|
474
485
|
self,
|
|
475
|
-
messages:
|
|
486
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
476
487
|
*,
|
|
477
488
|
model: Optional[str] = None,
|
|
478
489
|
stream: Literal[True] = True,
|
|
479
490
|
frequency_penalty: Optional[float] = None,
|
|
480
|
-
logit_bias: Optional[
|
|
491
|
+
logit_bias: Optional[list[float]] = None,
|
|
481
492
|
logprobs: Optional[bool] = None,
|
|
482
493
|
max_tokens: Optional[int] = None,
|
|
483
494
|
n: Optional[int] = None,
|
|
484
495
|
presence_penalty: Optional[float] = None,
|
|
485
496
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
486
497
|
seed: Optional[int] = None,
|
|
487
|
-
stop: Optional[
|
|
498
|
+
stop: Optional[list[str]] = None,
|
|
488
499
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
489
500
|
temperature: Optional[float] = None,
|
|
490
501
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
491
502
|
tool_prompt: Optional[str] = None,
|
|
492
|
-
tools: Optional[
|
|
503
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
493
504
|
top_logprobs: Optional[int] = None,
|
|
494
505
|
top_p: Optional[float] = None,
|
|
495
|
-
extra_body: Optional[
|
|
506
|
+
extra_body: Optional[dict] = None,
|
|
496
507
|
) -> Iterable[ChatCompletionStreamOutput]: ...
|
|
497
508
|
|
|
498
509
|
@overload
|
|
499
510
|
def chat_completion(
|
|
500
511
|
self,
|
|
501
|
-
messages:
|
|
512
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
502
513
|
*,
|
|
503
514
|
model: Optional[str] = None,
|
|
504
515
|
stream: bool = False,
|
|
505
516
|
frequency_penalty: Optional[float] = None,
|
|
506
|
-
logit_bias: Optional[
|
|
517
|
+
logit_bias: Optional[list[float]] = None,
|
|
507
518
|
logprobs: Optional[bool] = None,
|
|
508
519
|
max_tokens: Optional[int] = None,
|
|
509
520
|
n: Optional[int] = None,
|
|
510
521
|
presence_penalty: Optional[float] = None,
|
|
511
522
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
512
523
|
seed: Optional[int] = None,
|
|
513
|
-
stop: Optional[
|
|
524
|
+
stop: Optional[list[str]] = None,
|
|
514
525
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
515
526
|
temperature: Optional[float] = None,
|
|
516
527
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
517
528
|
tool_prompt: Optional[str] = None,
|
|
518
|
-
tools: Optional[
|
|
529
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
519
530
|
top_logprobs: Optional[int] = None,
|
|
520
531
|
top_p: Optional[float] = None,
|
|
521
|
-
extra_body: Optional[
|
|
532
|
+
extra_body: Optional[dict] = None,
|
|
522
533
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
|
|
523
534
|
|
|
524
535
|
def chat_completion(
|
|
525
536
|
self,
|
|
526
|
-
messages:
|
|
537
|
+
messages: list[Union[dict, ChatCompletionInputMessage]],
|
|
527
538
|
*,
|
|
528
539
|
model: Optional[str] = None,
|
|
529
540
|
stream: bool = False,
|
|
530
541
|
# Parameters from ChatCompletionInput (handled manually)
|
|
531
542
|
frequency_penalty: Optional[float] = None,
|
|
532
|
-
logit_bias: Optional[
|
|
543
|
+
logit_bias: Optional[list[float]] = None,
|
|
533
544
|
logprobs: Optional[bool] = None,
|
|
534
545
|
max_tokens: Optional[int] = None,
|
|
535
546
|
n: Optional[int] = None,
|
|
536
547
|
presence_penalty: Optional[float] = None,
|
|
537
548
|
response_format: Optional[ChatCompletionInputGrammarType] = None,
|
|
538
549
|
seed: Optional[int] = None,
|
|
539
|
-
stop: Optional[
|
|
550
|
+
stop: Optional[list[str]] = None,
|
|
540
551
|
stream_options: Optional[ChatCompletionInputStreamOptions] = None,
|
|
541
552
|
temperature: Optional[float] = None,
|
|
542
553
|
tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
|
|
543
554
|
tool_prompt: Optional[str] = None,
|
|
544
|
-
tools: Optional[
|
|
555
|
+
tools: Optional[list[ChatCompletionInputTool]] = None,
|
|
545
556
|
top_logprobs: Optional[int] = None,
|
|
546
557
|
top_p: Optional[float] = None,
|
|
547
|
-
extra_body: Optional[
|
|
558
|
+
extra_body: Optional[dict] = None,
|
|
548
559
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
|
|
549
560
|
"""
|
|
550
561
|
A method for completing conversations using a specified language model.
|
|
@@ -574,7 +585,7 @@ class InferenceClient:
|
|
|
574
585
|
frequency_penalty (`float`, *optional*):
|
|
575
586
|
Penalizes new tokens based on their existing frequency
|
|
576
587
|
in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
|
|
577
|
-
logit_bias (`
|
|
588
|
+
logit_bias (`list[float]`, *optional*):
|
|
578
589
|
Adjusts the likelihood of specific tokens appearing in the generated output.
|
|
579
590
|
logprobs (`bool`, *optional*):
|
|
580
591
|
Whether to return log probabilities of the output tokens or not. If true, returns the log
|
|
@@ -590,7 +601,7 @@ class InferenceClient:
|
|
|
590
601
|
Grammar constraints. Can be either a JSONSchema or a regex.
|
|
591
602
|
seed (Optional[`int`], *optional*):
|
|
592
603
|
Seed for reproducible control flow. Defaults to None.
|
|
593
|
-
stop (`
|
|
604
|
+
stop (`list[str]`, *optional*):
|
|
594
605
|
Up to four strings which trigger the end of the response.
|
|
595
606
|
Defaults to None.
|
|
596
607
|
stream (`bool`, *optional*):
|
|
@@ -614,7 +625,7 @@ class InferenceClient:
|
|
|
614
625
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
615
626
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
616
627
|
provide a list of functions the model may generate JSON inputs for.
|
|
617
|
-
extra_body (`
|
|
628
|
+
extra_body (`dict`, *optional*):
|
|
618
629
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
619
630
|
for supported parameters.
|
|
620
631
|
Returns:
|
|
@@ -626,7 +637,7 @@ class InferenceClient:
|
|
|
626
637
|
Raises:
|
|
627
638
|
[`InferenceTimeoutError`]:
|
|
628
639
|
If the model is unavailable or the request times out.
|
|
629
|
-
`
|
|
640
|
+
[`HfHubHTTPError`]:
|
|
630
641
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
631
642
|
|
|
632
643
|
Example:
|
|
@@ -940,8 +951,8 @@ class InferenceClient:
|
|
|
940
951
|
max_question_len: Optional[int] = None,
|
|
941
952
|
max_seq_len: Optional[int] = None,
|
|
942
953
|
top_k: Optional[int] = None,
|
|
943
|
-
word_boxes: Optional[
|
|
944
|
-
) ->
|
|
954
|
+
word_boxes: Optional[list[Union[list[float], str]]] = None,
|
|
955
|
+
) -> list[DocumentQuestionAnsweringOutputElement]:
|
|
945
956
|
"""
|
|
946
957
|
Answer questions on document images.
|
|
947
958
|
|
|
@@ -971,16 +982,16 @@ class InferenceClient:
|
|
|
971
982
|
top_k (`int`, *optional*):
|
|
972
983
|
The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
|
|
973
984
|
answers if there are not enough options available within the context.
|
|
974
|
-
word_boxes (`
|
|
985
|
+
word_boxes (`list[Union[list[float], str`, *optional*):
|
|
975
986
|
A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
|
|
976
987
|
step and use the provided bounding boxes instead.
|
|
977
988
|
Returns:
|
|
978
|
-
`
|
|
989
|
+
`list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
|
|
979
990
|
|
|
980
991
|
Raises:
|
|
981
992
|
[`InferenceTimeoutError`]:
|
|
982
993
|
If the model is unavailable or the request times out.
|
|
983
|
-
`
|
|
994
|
+
[`HfHubHTTPError`]:
|
|
984
995
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
985
996
|
|
|
986
997
|
|
|
@@ -994,7 +1005,7 @@ class InferenceClient:
|
|
|
994
1005
|
"""
|
|
995
1006
|
model_id = model or self.model
|
|
996
1007
|
provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
|
|
997
|
-
inputs:
|
|
1008
|
+
inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
|
|
998
1009
|
request_parameters = provider_helper.prepare_request(
|
|
999
1010
|
inputs=inputs,
|
|
1000
1011
|
parameters={
|
|
@@ -1055,7 +1066,7 @@ class InferenceClient:
|
|
|
1055
1066
|
Raises:
|
|
1056
1067
|
[`InferenceTimeoutError`]:
|
|
1057
1068
|
If the model is unavailable or the request times out.
|
|
1058
|
-
`
|
|
1069
|
+
[`HfHubHTTPError`]:
|
|
1059
1070
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1060
1071
|
|
|
1061
1072
|
Example:
|
|
@@ -1092,9 +1103,9 @@ class InferenceClient:
|
|
|
1092
1103
|
text: str,
|
|
1093
1104
|
*,
|
|
1094
1105
|
model: Optional[str] = None,
|
|
1095
|
-
targets: Optional[
|
|
1106
|
+
targets: Optional[list[str]] = None,
|
|
1096
1107
|
top_k: Optional[int] = None,
|
|
1097
|
-
) ->
|
|
1108
|
+
) -> list[FillMaskOutputElement]:
|
|
1098
1109
|
"""
|
|
1099
1110
|
Fill in a hole with a missing word (token to be precise).
|
|
1100
1111
|
|
|
@@ -1104,20 +1115,20 @@ class InferenceClient:
|
|
|
1104
1115
|
model (`str`, *optional*):
|
|
1105
1116
|
The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
1106
1117
|
a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
|
|
1107
|
-
targets (`
|
|
1118
|
+
targets (`list[str`, *optional*):
|
|
1108
1119
|
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
|
|
1109
1120
|
vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
|
|
1110
1121
|
resulting token will be used (with a warning, and that might be slower).
|
|
1111
1122
|
top_k (`int`, *optional*):
|
|
1112
1123
|
When passed, overrides the number of predictions to return.
|
|
1113
1124
|
Returns:
|
|
1114
|
-
`
|
|
1125
|
+
`list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
|
|
1115
1126
|
probability, token reference, and completed text.
|
|
1116
1127
|
|
|
1117
1128
|
Raises:
|
|
1118
1129
|
[`InferenceTimeoutError`]:
|
|
1119
1130
|
If the model is unavailable or the request times out.
|
|
1120
|
-
`
|
|
1131
|
+
[`HfHubHTTPError`]:
|
|
1121
1132
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1122
1133
|
|
|
1123
1134
|
Example:
|
|
@@ -1150,7 +1161,7 @@ class InferenceClient:
|
|
|
1150
1161
|
model: Optional[str] = None,
|
|
1151
1162
|
function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
|
|
1152
1163
|
top_k: Optional[int] = None,
|
|
1153
|
-
) ->
|
|
1164
|
+
) -> list[ImageClassificationOutputElement]:
|
|
1154
1165
|
"""
|
|
1155
1166
|
Perform image classification on the given image using the specified model.
|
|
1156
1167
|
|
|
@@ -1165,12 +1176,12 @@ class InferenceClient:
|
|
|
1165
1176
|
top_k (`int`, *optional*):
|
|
1166
1177
|
When specified, limits the output to the top K most probable classes.
|
|
1167
1178
|
Returns:
|
|
1168
|
-
`
|
|
1179
|
+
`list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1169
1180
|
|
|
1170
1181
|
Raises:
|
|
1171
1182
|
[`InferenceTimeoutError`]:
|
|
1172
1183
|
If the model is unavailable or the request times out.
|
|
1173
|
-
`
|
|
1184
|
+
[`HfHubHTTPError`]:
|
|
1174
1185
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1175
1186
|
|
|
1176
1187
|
Example:
|
|
@@ -1202,7 +1213,7 @@ class InferenceClient:
|
|
|
1202
1213
|
overlap_mask_area_threshold: Optional[float] = None,
|
|
1203
1214
|
subtask: Optional["ImageSegmentationSubtask"] = None,
|
|
1204
1215
|
threshold: Optional[float] = None,
|
|
1205
|
-
) ->
|
|
1216
|
+
) -> list[ImageSegmentationOutputElement]:
|
|
1206
1217
|
"""
|
|
1207
1218
|
Perform image segmentation on the given image using the specified model.
|
|
1208
1219
|
|
|
@@ -1227,12 +1238,12 @@ class InferenceClient:
|
|
|
1227
1238
|
threshold (`float`, *optional*):
|
|
1228
1239
|
Probability threshold to filter out predicted masks.
|
|
1229
1240
|
Returns:
|
|
1230
|
-
`
|
|
1241
|
+
`list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
|
|
1231
1242
|
|
|
1232
1243
|
Raises:
|
|
1233
1244
|
[`InferenceTimeoutError`]:
|
|
1234
1245
|
If the model is unavailable or the request times out.
|
|
1235
|
-
`
|
|
1246
|
+
[`HfHubHTTPError`]:
|
|
1236
1247
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1237
1248
|
|
|
1238
1249
|
Example:
|
|
@@ -1309,7 +1320,7 @@ class InferenceClient:
|
|
|
1309
1320
|
Raises:
|
|
1310
1321
|
[`InferenceTimeoutError`]:
|
|
1311
1322
|
If the model is unavailable or the request times out.
|
|
1312
|
-
`
|
|
1323
|
+
[`HfHubHTTPError`]:
|
|
1313
1324
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1314
1325
|
|
|
1315
1326
|
Example:
|
|
@@ -1439,7 +1450,7 @@ class InferenceClient:
|
|
|
1439
1450
|
Raises:
|
|
1440
1451
|
[`InferenceTimeoutError`]:
|
|
1441
1452
|
If the model is unavailable or the request times out.
|
|
1442
|
-
`
|
|
1453
|
+
[`HfHubHTTPError`]:
|
|
1443
1454
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1444
1455
|
|
|
1445
1456
|
Example:
|
|
@@ -1462,12 +1473,12 @@ class InferenceClient:
|
|
|
1462
1473
|
api_key=self.token,
|
|
1463
1474
|
)
|
|
1464
1475
|
response = self._inner_post(request_parameters)
|
|
1465
|
-
|
|
1466
|
-
return
|
|
1476
|
+
output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
|
|
1477
|
+
return output_list[0]
|
|
1467
1478
|
|
|
1468
1479
|
def object_detection(
|
|
1469
1480
|
self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
|
|
1470
|
-
) ->
|
|
1481
|
+
) -> list[ObjectDetectionOutputElement]:
|
|
1471
1482
|
"""
|
|
1472
1483
|
Perform object detection on the given image using the specified model.
|
|
1473
1484
|
|
|
@@ -1486,12 +1497,12 @@ class InferenceClient:
|
|
|
1486
1497
|
threshold (`float`, *optional*):
|
|
1487
1498
|
The probability necessary to make a prediction.
|
|
1488
1499
|
Returns:
|
|
1489
|
-
`
|
|
1500
|
+
`list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
|
|
1490
1501
|
|
|
1491
1502
|
Raises:
|
|
1492
1503
|
[`InferenceTimeoutError`]:
|
|
1493
1504
|
If the model is unavailable or the request times out.
|
|
1494
|
-
`
|
|
1505
|
+
[`HfHubHTTPError`]:
|
|
1495
1506
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1496
1507
|
`ValueError`:
|
|
1497
1508
|
If the request output is not a List.
|
|
@@ -1529,7 +1540,7 @@ class InferenceClient:
|
|
|
1529
1540
|
max_question_len: Optional[int] = None,
|
|
1530
1541
|
max_seq_len: Optional[int] = None,
|
|
1531
1542
|
top_k: Optional[int] = None,
|
|
1532
|
-
) -> Union[QuestionAnsweringOutputElement,
|
|
1543
|
+
) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
|
|
1533
1544
|
"""
|
|
1534
1545
|
Retrieve the answer to a question from a given text.
|
|
1535
1546
|
|
|
@@ -1561,13 +1572,13 @@ class InferenceClient:
|
|
|
1561
1572
|
topk answers if there are not enough options available within the context.
|
|
1562
1573
|
|
|
1563
1574
|
Returns:
|
|
1564
|
-
Union[`QuestionAnsweringOutputElement`,
|
|
1575
|
+
Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
|
|
1565
1576
|
When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
|
|
1566
1577
|
When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
|
|
1567
1578
|
Raises:
|
|
1568
1579
|
[`InferenceTimeoutError`]:
|
|
1569
1580
|
If the model is unavailable or the request times out.
|
|
1570
|
-
`
|
|
1581
|
+
[`HfHubHTTPError`]:
|
|
1571
1582
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1572
1583
|
|
|
1573
1584
|
Example:
|
|
@@ -1601,15 +1612,15 @@ class InferenceClient:
|
|
|
1601
1612
|
return output
|
|
1602
1613
|
|
|
1603
1614
|
def sentence_similarity(
|
|
1604
|
-
self, sentence: str, other_sentences:
|
|
1605
|
-
) ->
|
|
1615
|
+
self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
|
|
1616
|
+
) -> list[float]:
|
|
1606
1617
|
"""
|
|
1607
1618
|
Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
|
|
1608
1619
|
|
|
1609
1620
|
Args:
|
|
1610
1621
|
sentence (`str`):
|
|
1611
1622
|
The main sentence to compare to others.
|
|
1612
|
-
other_sentences (`
|
|
1623
|
+
other_sentences (`list[str]`):
|
|
1613
1624
|
The list of sentences to compare to.
|
|
1614
1625
|
model (`str`, *optional*):
|
|
1615
1626
|
The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1617,12 +1628,12 @@ class InferenceClient:
|
|
|
1617
1628
|
Defaults to None.
|
|
1618
1629
|
|
|
1619
1630
|
Returns:
|
|
1620
|
-
`
|
|
1631
|
+
`list[float]`: The embedding representing the input text.
|
|
1621
1632
|
|
|
1622
1633
|
Raises:
|
|
1623
1634
|
[`InferenceTimeoutError`]:
|
|
1624
1635
|
If the model is unavailable or the request times out.
|
|
1625
|
-
`
|
|
1636
|
+
[`HfHubHTTPError`]:
|
|
1626
1637
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1627
1638
|
|
|
1628
1639
|
Example:
|
|
@@ -1659,7 +1670,7 @@ class InferenceClient:
|
|
|
1659
1670
|
*,
|
|
1660
1671
|
model: Optional[str] = None,
|
|
1661
1672
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1662
|
-
generate_parameters: Optional[
|
|
1673
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
1663
1674
|
truncation: Optional["SummarizationTruncationStrategy"] = None,
|
|
1664
1675
|
) -> SummarizationOutput:
|
|
1665
1676
|
"""
|
|
@@ -1673,7 +1684,7 @@ class InferenceClient:
|
|
|
1673
1684
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
1674
1685
|
clean_up_tokenization_spaces (`bool`, *optional*):
|
|
1675
1686
|
Whether to clean up the potential extra spaces in the text output.
|
|
1676
|
-
generate_parameters (`
|
|
1687
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
1677
1688
|
Additional parametrization of the text generation algorithm.
|
|
1678
1689
|
truncation (`"SummarizationTruncationStrategy"`, *optional*):
|
|
1679
1690
|
The truncation strategy to use.
|
|
@@ -1683,7 +1694,7 @@ class InferenceClient:
|
|
|
1683
1694
|
Raises:
|
|
1684
1695
|
[`InferenceTimeoutError`]:
|
|
1685
1696
|
If the model is unavailable or the request times out.
|
|
1686
|
-
`
|
|
1697
|
+
[`HfHubHTTPError`]:
|
|
1687
1698
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1688
1699
|
|
|
1689
1700
|
Example:
|
|
@@ -1713,7 +1724,7 @@ class InferenceClient:
|
|
|
1713
1724
|
|
|
1714
1725
|
def table_question_answering(
|
|
1715
1726
|
self,
|
|
1716
|
-
table:
|
|
1727
|
+
table: dict[str, Any],
|
|
1717
1728
|
query: str,
|
|
1718
1729
|
*,
|
|
1719
1730
|
model: Optional[str] = None,
|
|
@@ -1748,7 +1759,7 @@ class InferenceClient:
|
|
|
1748
1759
|
Raises:
|
|
1749
1760
|
[`InferenceTimeoutError`]:
|
|
1750
1761
|
If the model is unavailable or the request times out.
|
|
1751
|
-
`
|
|
1762
|
+
[`HfHubHTTPError`]:
|
|
1752
1763
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1753
1764
|
|
|
1754
1765
|
Example:
|
|
@@ -1773,12 +1784,12 @@ class InferenceClient:
|
|
|
1773
1784
|
response = self._inner_post(request_parameters)
|
|
1774
1785
|
return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
|
|
1775
1786
|
|
|
1776
|
-
def tabular_classification(self, table:
|
|
1787
|
+
def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
|
|
1777
1788
|
"""
|
|
1778
1789
|
Classifying a target category (a group) based on a set of attributes.
|
|
1779
1790
|
|
|
1780
1791
|
Args:
|
|
1781
|
-
table (`
|
|
1792
|
+
table (`dict[str, Any]`):
|
|
1782
1793
|
Set of attributes to classify.
|
|
1783
1794
|
model (`str`, *optional*):
|
|
1784
1795
|
The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1791,7 +1802,7 @@ class InferenceClient:
|
|
|
1791
1802
|
Raises:
|
|
1792
1803
|
[`InferenceTimeoutError`]:
|
|
1793
1804
|
If the model is unavailable or the request times out.
|
|
1794
|
-
`
|
|
1805
|
+
[`HfHubHTTPError`]:
|
|
1795
1806
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1796
1807
|
|
|
1797
1808
|
Example:
|
|
@@ -1828,12 +1839,12 @@ class InferenceClient:
|
|
|
1828
1839
|
response = self._inner_post(request_parameters)
|
|
1829
1840
|
return _bytes_to_list(response)
|
|
1830
1841
|
|
|
1831
|
-
def tabular_regression(self, table:
|
|
1842
|
+
def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
|
|
1832
1843
|
"""
|
|
1833
1844
|
Predicting a numerical target value given a set of attributes/features in a table.
|
|
1834
1845
|
|
|
1835
1846
|
Args:
|
|
1836
|
-
table (`
|
|
1847
|
+
table (`dict[str, Any]`):
|
|
1837
1848
|
Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
|
|
1838
1849
|
model (`str`, *optional*):
|
|
1839
1850
|
The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
|
|
@@ -1846,7 +1857,7 @@ class InferenceClient:
|
|
|
1846
1857
|
Raises:
|
|
1847
1858
|
[`InferenceTimeoutError`]:
|
|
1848
1859
|
If the model is unavailable or the request times out.
|
|
1849
|
-
`
|
|
1860
|
+
[`HfHubHTTPError`]:
|
|
1850
1861
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1851
1862
|
|
|
1852
1863
|
Example:
|
|
@@ -1885,7 +1896,7 @@ class InferenceClient:
|
|
|
1885
1896
|
model: Optional[str] = None,
|
|
1886
1897
|
top_k: Optional[int] = None,
|
|
1887
1898
|
function_to_apply: Optional["TextClassificationOutputTransform"] = None,
|
|
1888
|
-
) ->
|
|
1899
|
+
) -> list[TextClassificationOutputElement]:
|
|
1889
1900
|
"""
|
|
1890
1901
|
Perform text classification (e.g. sentiment-analysis) on the given text.
|
|
1891
1902
|
|
|
@@ -1902,12 +1913,12 @@ class InferenceClient:
|
|
|
1902
1913
|
The function to apply to the model outputs in order to retrieve the scores.
|
|
1903
1914
|
|
|
1904
1915
|
Returns:
|
|
1905
|
-
`
|
|
1916
|
+
`list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
|
|
1906
1917
|
|
|
1907
1918
|
Raises:
|
|
1908
1919
|
[`InferenceTimeoutError`]:
|
|
1909
1920
|
If the model is unavailable or the request times out.
|
|
1910
|
-
`
|
|
1921
|
+
[`HfHubHTTPError`]:
|
|
1911
1922
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
1912
1923
|
|
|
1913
1924
|
Example:
|
|
@@ -1955,8 +1966,8 @@ class InferenceClient:
|
|
|
1955
1966
|
repetition_penalty: Optional[float] = None,
|
|
1956
1967
|
return_full_text: Optional[bool] = None,
|
|
1957
1968
|
seed: Optional[int] = None,
|
|
1958
|
-
stop: Optional[
|
|
1959
|
-
stop_sequences: Optional[
|
|
1969
|
+
stop: Optional[list[str]] = None,
|
|
1970
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1960
1971
|
temperature: Optional[float] = None,
|
|
1961
1972
|
top_k: Optional[int] = None,
|
|
1962
1973
|
top_n_tokens: Optional[int] = None,
|
|
@@ -1985,8 +1996,8 @@ class InferenceClient:
|
|
|
1985
1996
|
repetition_penalty: Optional[float] = None,
|
|
1986
1997
|
return_full_text: Optional[bool] = None,
|
|
1987
1998
|
seed: Optional[int] = None,
|
|
1988
|
-
stop: Optional[
|
|
1989
|
-
stop_sequences: Optional[
|
|
1999
|
+
stop: Optional[list[str]] = None,
|
|
2000
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
1990
2001
|
temperature: Optional[float] = None,
|
|
1991
2002
|
top_k: Optional[int] = None,
|
|
1992
2003
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2015,8 +2026,8 @@ class InferenceClient:
|
|
|
2015
2026
|
repetition_penalty: Optional[float] = None,
|
|
2016
2027
|
return_full_text: Optional[bool] = None, # Manual default value
|
|
2017
2028
|
seed: Optional[int] = None,
|
|
2018
|
-
stop: Optional[
|
|
2019
|
-
stop_sequences: Optional[
|
|
2029
|
+
stop: Optional[list[str]] = None,
|
|
2030
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2020
2031
|
temperature: Optional[float] = None,
|
|
2021
2032
|
top_k: Optional[int] = None,
|
|
2022
2033
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2045,8 +2056,8 @@ class InferenceClient:
|
|
|
2045
2056
|
repetition_penalty: Optional[float] = None,
|
|
2046
2057
|
return_full_text: Optional[bool] = None,
|
|
2047
2058
|
seed: Optional[int] = None,
|
|
2048
|
-
stop: Optional[
|
|
2049
|
-
stop_sequences: Optional[
|
|
2059
|
+
stop: Optional[list[str]] = None,
|
|
2060
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2050
2061
|
temperature: Optional[float] = None,
|
|
2051
2062
|
top_k: Optional[int] = None,
|
|
2052
2063
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2075,8 +2086,8 @@ class InferenceClient:
|
|
|
2075
2086
|
repetition_penalty: Optional[float] = None,
|
|
2076
2087
|
return_full_text: Optional[bool] = None,
|
|
2077
2088
|
seed: Optional[int] = None,
|
|
2078
|
-
stop: Optional[
|
|
2079
|
-
stop_sequences: Optional[
|
|
2089
|
+
stop: Optional[list[str]] = None,
|
|
2090
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2080
2091
|
temperature: Optional[float] = None,
|
|
2081
2092
|
top_k: Optional[int] = None,
|
|
2082
2093
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2104,8 +2115,8 @@ class InferenceClient:
|
|
|
2104
2115
|
repetition_penalty: Optional[float] = None,
|
|
2105
2116
|
return_full_text: Optional[bool] = None,
|
|
2106
2117
|
seed: Optional[int] = None,
|
|
2107
|
-
stop: Optional[
|
|
2108
|
-
stop_sequences: Optional[
|
|
2118
|
+
stop: Optional[list[str]] = None,
|
|
2119
|
+
stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
|
|
2109
2120
|
temperature: Optional[float] = None,
|
|
2110
2121
|
top_k: Optional[int] = None,
|
|
2111
2122
|
top_n_tokens: Optional[int] = None,
|
|
@@ -2161,9 +2172,9 @@ class InferenceClient:
|
|
|
2161
2172
|
Whether to prepend the prompt to the generated text
|
|
2162
2173
|
seed (`int`, *optional*):
|
|
2163
2174
|
Random sampling seed
|
|
2164
|
-
stop (`
|
|
2175
|
+
stop (`list[str]`, *optional*):
|
|
2165
2176
|
Stop generating tokens if a member of `stop` is generated.
|
|
2166
|
-
stop_sequences (`
|
|
2177
|
+
stop_sequences (`list[str]`, *optional*):
|
|
2167
2178
|
Deprecated argument. Use `stop` instead.
|
|
2168
2179
|
temperature (`float`, *optional*):
|
|
2169
2180
|
The value used to module the logits distribution.
|
|
@@ -2196,7 +2207,7 @@ class InferenceClient:
|
|
|
2196
2207
|
If input values are not valid. No HTTP call is made to the server.
|
|
2197
2208
|
[`InferenceTimeoutError`]:
|
|
2198
2209
|
If the model is unavailable or the request times out.
|
|
2199
|
-
`
|
|
2210
|
+
[`HfHubHTTPError`]:
|
|
2200
2211
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2201
2212
|
|
|
2202
2213
|
Example:
|
|
@@ -2385,7 +2396,7 @@ class InferenceClient:
|
|
|
2385
2396
|
# Handle errors separately for more precise error messages
|
|
2386
2397
|
try:
|
|
2387
2398
|
bytes_output = self._inner_post(request_parameters, stream=stream or False)
|
|
2388
|
-
except
|
|
2399
|
+
except HfHubHTTPError as e:
|
|
2389
2400
|
match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
|
|
2390
2401
|
if isinstance(e, BadRequestError) and match:
|
|
2391
2402
|
unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
|
|
@@ -2440,7 +2451,7 @@ class InferenceClient:
|
|
|
2440
2451
|
model: Optional[str] = None,
|
|
2441
2452
|
scheduler: Optional[str] = None,
|
|
2442
2453
|
seed: Optional[int] = None,
|
|
2443
|
-
extra_body: Optional[
|
|
2454
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2444
2455
|
) -> "Image":
|
|
2445
2456
|
"""
|
|
2446
2457
|
Generate an image based on a given text using a specified model.
|
|
@@ -2478,7 +2489,7 @@ class InferenceClient:
|
|
|
2478
2489
|
Override the scheduler with a compatible one.
|
|
2479
2490
|
seed (`int`, *optional*):
|
|
2480
2491
|
Seed for the random number generator.
|
|
2481
|
-
extra_body (`
|
|
2492
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2482
2493
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2483
2494
|
for supported parameters.
|
|
2484
2495
|
|
|
@@ -2488,7 +2499,7 @@ class InferenceClient:
|
|
|
2488
2499
|
Raises:
|
|
2489
2500
|
[`InferenceTimeoutError`]:
|
|
2490
2501
|
If the model is unavailable or the request times out.
|
|
2491
|
-
`
|
|
2502
|
+
[`HfHubHTTPError`]:
|
|
2492
2503
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2493
2504
|
|
|
2494
2505
|
Example:
|
|
@@ -2577,11 +2588,11 @@ class InferenceClient:
|
|
|
2577
2588
|
*,
|
|
2578
2589
|
model: Optional[str] = None,
|
|
2579
2590
|
guidance_scale: Optional[float] = None,
|
|
2580
|
-
negative_prompt: Optional[
|
|
2591
|
+
negative_prompt: Optional[list[str]] = None,
|
|
2581
2592
|
num_frames: Optional[float] = None,
|
|
2582
2593
|
num_inference_steps: Optional[int] = None,
|
|
2583
2594
|
seed: Optional[int] = None,
|
|
2584
|
-
extra_body: Optional[
|
|
2595
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2585
2596
|
) -> bytes:
|
|
2586
2597
|
"""
|
|
2587
2598
|
Generate a video based on a given text.
|
|
@@ -2600,7 +2611,7 @@ class InferenceClient:
|
|
|
2600
2611
|
guidance_scale (`float`, *optional*):
|
|
2601
2612
|
A higher guidance scale value encourages the model to generate videos closely linked to the text
|
|
2602
2613
|
prompt, but values too high may cause saturation and other artifacts.
|
|
2603
|
-
negative_prompt (`
|
|
2614
|
+
negative_prompt (`list[str]`, *optional*):
|
|
2604
2615
|
One or several prompt to guide what NOT to include in video generation.
|
|
2605
2616
|
num_frames (`float`, *optional*):
|
|
2606
2617
|
The num_frames parameter determines how many video frames are generated.
|
|
@@ -2609,7 +2620,7 @@ class InferenceClient:
|
|
|
2609
2620
|
expense of slower inference.
|
|
2610
2621
|
seed (`int`, *optional*):
|
|
2611
2622
|
Seed for the random number generator.
|
|
2612
|
-
extra_body (`
|
|
2623
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2613
2624
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2614
2625
|
for supported parameters.
|
|
2615
2626
|
|
|
@@ -2689,7 +2700,7 @@ class InferenceClient:
|
|
|
2689
2700
|
top_p: Optional[float] = None,
|
|
2690
2701
|
typical_p: Optional[float] = None,
|
|
2691
2702
|
use_cache: Optional[bool] = None,
|
|
2692
|
-
extra_body: Optional[
|
|
2703
|
+
extra_body: Optional[dict[str, Any]] = None,
|
|
2693
2704
|
) -> bytes:
|
|
2694
2705
|
"""
|
|
2695
2706
|
Synthesize an audio of a voice pronouncing a given text.
|
|
@@ -2751,7 +2762,7 @@ class InferenceClient:
|
|
|
2751
2762
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2752
2763
|
use_cache (`bool`, *optional*):
|
|
2753
2764
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2754
|
-
extra_body (`
|
|
2765
|
+
extra_body (`dict[str, Any]`, *optional*):
|
|
2755
2766
|
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2756
2767
|
for supported parameters.
|
|
2757
2768
|
Returns:
|
|
@@ -2760,7 +2771,7 @@ class InferenceClient:
|
|
|
2760
2771
|
Raises:
|
|
2761
2772
|
[`InferenceTimeoutError`]:
|
|
2762
2773
|
If the model is unavailable or the request times out.
|
|
2763
|
-
`
|
|
2774
|
+
[`HfHubHTTPError`]:
|
|
2764
2775
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2765
2776
|
|
|
2766
2777
|
Example:
|
|
@@ -2883,9 +2894,9 @@ class InferenceClient:
|
|
|
2883
2894
|
*,
|
|
2884
2895
|
model: Optional[str] = None,
|
|
2885
2896
|
aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
|
|
2886
|
-
ignore_labels: Optional[
|
|
2897
|
+
ignore_labels: Optional[list[str]] = None,
|
|
2887
2898
|
stride: Optional[int] = None,
|
|
2888
|
-
) ->
|
|
2899
|
+
) -> list[TokenClassificationOutputElement]:
|
|
2889
2900
|
"""
|
|
2890
2901
|
Perform token classification on the given text.
|
|
2891
2902
|
Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
|
|
@@ -2899,18 +2910,18 @@ class InferenceClient:
|
|
|
2899
2910
|
Defaults to None.
|
|
2900
2911
|
aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
|
|
2901
2912
|
The strategy used to fuse tokens based on model predictions
|
|
2902
|
-
ignore_labels (`
|
|
2913
|
+
ignore_labels (`list[str`, *optional*):
|
|
2903
2914
|
A list of labels to ignore
|
|
2904
2915
|
stride (`int`, *optional*):
|
|
2905
2916
|
The number of overlapping tokens between chunks when splitting the input text.
|
|
2906
2917
|
|
|
2907
2918
|
Returns:
|
|
2908
|
-
`
|
|
2919
|
+
`list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
|
|
2909
2920
|
|
|
2910
2921
|
Raises:
|
|
2911
2922
|
[`InferenceTimeoutError`]:
|
|
2912
2923
|
If the model is unavailable or the request times out.
|
|
2913
|
-
`
|
|
2924
|
+
[`HfHubHTTPError`]:
|
|
2914
2925
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
2915
2926
|
|
|
2916
2927
|
Example:
|
|
@@ -2961,7 +2972,7 @@ class InferenceClient:
|
|
|
2961
2972
|
tgt_lang: Optional[str] = None,
|
|
2962
2973
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
2963
2974
|
truncation: Optional["TranslationTruncationStrategy"] = None,
|
|
2964
|
-
generate_parameters: Optional[
|
|
2975
|
+
generate_parameters: Optional[dict[str, Any]] = None,
|
|
2965
2976
|
) -> TranslationOutput:
|
|
2966
2977
|
"""
|
|
2967
2978
|
Convert text from one language to another.
|
|
@@ -2986,7 +2997,7 @@ class InferenceClient:
|
|
|
2986
2997
|
Whether to clean up the potential extra spaces in the text output.
|
|
2987
2998
|
truncation (`"TranslationTruncationStrategy"`, *optional*):
|
|
2988
2999
|
The truncation strategy to use.
|
|
2989
|
-
generate_parameters (`
|
|
3000
|
+
generate_parameters (`dict[str, Any]`, *optional*):
|
|
2990
3001
|
Additional parametrization of the text generation algorithm.
|
|
2991
3002
|
|
|
2992
3003
|
Returns:
|
|
@@ -2995,7 +3006,7 @@ class InferenceClient:
|
|
|
2995
3006
|
Raises:
|
|
2996
3007
|
[`InferenceTimeoutError`]:
|
|
2997
3008
|
If the model is unavailable or the request times out.
|
|
2998
|
-
`
|
|
3009
|
+
[`HfHubHTTPError`]:
|
|
2999
3010
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3000
3011
|
`ValueError`:
|
|
3001
3012
|
If only one of the `src_lang` and `tgt_lang` arguments are provided.
|
|
@@ -3048,7 +3059,7 @@ class InferenceClient:
|
|
|
3048
3059
|
*,
|
|
3049
3060
|
model: Optional[str] = None,
|
|
3050
3061
|
top_k: Optional[int] = None,
|
|
3051
|
-
) ->
|
|
3062
|
+
) -> list[VisualQuestionAnsweringOutputElement]:
|
|
3052
3063
|
"""
|
|
3053
3064
|
Answering open-ended questions based on an image.
|
|
3054
3065
|
|
|
@@ -3065,12 +3076,12 @@ class InferenceClient:
|
|
|
3065
3076
|
The number of answers to return (will be chosen by order of likelihood). Note that we return less than
|
|
3066
3077
|
topk answers if there are not enough options available within the context.
|
|
3067
3078
|
Returns:
|
|
3068
|
-
`
|
|
3079
|
+
`list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
|
|
3069
3080
|
|
|
3070
3081
|
Raises:
|
|
3071
3082
|
`InferenceTimeoutError`:
|
|
3072
3083
|
If the model is unavailable or the request times out.
|
|
3073
|
-
`
|
|
3084
|
+
[`HfHubHTTPError`]:
|
|
3074
3085
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3075
3086
|
|
|
3076
3087
|
Example:
|
|
@@ -3103,21 +3114,21 @@ class InferenceClient:
|
|
|
3103
3114
|
def zero_shot_classification(
|
|
3104
3115
|
self,
|
|
3105
3116
|
text: str,
|
|
3106
|
-
candidate_labels:
|
|
3117
|
+
candidate_labels: list[str],
|
|
3107
3118
|
*,
|
|
3108
3119
|
multi_label: Optional[bool] = False,
|
|
3109
3120
|
hypothesis_template: Optional[str] = None,
|
|
3110
3121
|
model: Optional[str] = None,
|
|
3111
|
-
) ->
|
|
3122
|
+
) -> list[ZeroShotClassificationOutputElement]:
|
|
3112
3123
|
"""
|
|
3113
3124
|
Provide as input a text and a set of candidate labels to classify the input text.
|
|
3114
3125
|
|
|
3115
3126
|
Args:
|
|
3116
3127
|
text (`str`):
|
|
3117
3128
|
The input text to classify.
|
|
3118
|
-
candidate_labels (`
|
|
3129
|
+
candidate_labels (`list[str]`):
|
|
3119
3130
|
The set of possible class labels to classify the text into.
|
|
3120
|
-
labels (`
|
|
3131
|
+
labels (`list[str]`, *optional*):
|
|
3121
3132
|
(deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
|
|
3122
3133
|
multi_label (`bool`, *optional*):
|
|
3123
3134
|
Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
|
|
@@ -3132,12 +3143,12 @@ class InferenceClient:
|
|
|
3132
3143
|
|
|
3133
3144
|
|
|
3134
3145
|
Returns:
|
|
3135
|
-
`
|
|
3146
|
+
`list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3136
3147
|
|
|
3137
3148
|
Raises:
|
|
3138
3149
|
[`InferenceTimeoutError`]:
|
|
3139
3150
|
If the model is unavailable or the request times out.
|
|
3140
|
-
`
|
|
3151
|
+
[`HfHubHTTPError`]:
|
|
3141
3152
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3142
3153
|
|
|
3143
3154
|
Example with `multi_label=False`:
|
|
@@ -3209,22 +3220,22 @@ class InferenceClient:
|
|
|
3209
3220
|
def zero_shot_image_classification(
|
|
3210
3221
|
self,
|
|
3211
3222
|
image: ContentT,
|
|
3212
|
-
candidate_labels:
|
|
3223
|
+
candidate_labels: list[str],
|
|
3213
3224
|
*,
|
|
3214
3225
|
model: Optional[str] = None,
|
|
3215
3226
|
hypothesis_template: Optional[str] = None,
|
|
3216
3227
|
# deprecated argument
|
|
3217
|
-
labels:
|
|
3218
|
-
) ->
|
|
3228
|
+
labels: list[str] = None, # type: ignore
|
|
3229
|
+
) -> list[ZeroShotImageClassificationOutputElement]:
|
|
3219
3230
|
"""
|
|
3220
3231
|
Provide input image and text labels to predict text labels for the image.
|
|
3221
3232
|
|
|
3222
3233
|
Args:
|
|
3223
3234
|
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3224
3235
|
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3225
|
-
candidate_labels (`
|
|
3236
|
+
candidate_labels (`list[str]`):
|
|
3226
3237
|
The candidate labels for this image
|
|
3227
|
-
labels (`
|
|
3238
|
+
labels (`list[str]`, *optional*):
|
|
3228
3239
|
(deprecated) List of string possible labels. There must be at least 2 labels.
|
|
3229
3240
|
model (`str`, *optional*):
|
|
3230
3241
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
@@ -3234,12 +3245,12 @@ class InferenceClient:
|
|
|
3234
3245
|
replacing the placeholder with the candidate labels.
|
|
3235
3246
|
|
|
3236
3247
|
Returns:
|
|
3237
|
-
`
|
|
3248
|
+
`list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
|
|
3238
3249
|
|
|
3239
3250
|
Raises:
|
|
3240
3251
|
[`InferenceTimeoutError`]:
|
|
3241
3252
|
If the model is unavailable or the request times out.
|
|
3242
|
-
`
|
|
3253
|
+
[`HfHubHTTPError`]:
|
|
3243
3254
|
If the request fails with an HTTP error status code other than HTTP 503.
|
|
3244
3255
|
|
|
3245
3256
|
Example:
|
|
@@ -3273,102 +3284,7 @@ class InferenceClient:
|
|
|
3273
3284
|
response = self._inner_post(request_parameters)
|
|
3274
3285
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3275
3286
|
|
|
3276
|
-
|
|
3277
|
-
version="0.35.0",
|
|
3278
|
-
message=(
|
|
3279
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3280
|
-
" Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
|
|
3281
|
-
),
|
|
3282
|
-
)
|
|
3283
|
-
def list_deployed_models(
|
|
3284
|
-
self, frameworks: Union[None, str, Literal["all"], List[str]] = None
|
|
3285
|
-
) -> Dict[str, List[str]]:
|
|
3286
|
-
"""
|
|
3287
|
-
List models deployed on the HF Serverless Inference API service.
|
|
3288
|
-
|
|
3289
|
-
This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
|
|
3290
|
-
are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
|
|
3291
|
-
specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
|
|
3292
|
-
in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
|
|
3293
|
-
frameworks are checked, the more time it will take.
|
|
3294
|
-
|
|
3295
|
-
<Tip warning={true}>
|
|
3296
|
-
|
|
3297
|
-
This endpoint method does not return a live list of all models available for the HF Inference API service.
|
|
3298
|
-
It searches over a cached list of models that were recently available and the list may not be up to date.
|
|
3299
|
-
If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
|
|
3300
|
-
|
|
3301
|
-
</Tip>
|
|
3302
|
-
|
|
3303
|
-
<Tip>
|
|
3304
|
-
|
|
3305
|
-
This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
|
|
3306
|
-
check its availability, you can directly use [`~InferenceClient.get_model_status`].
|
|
3307
|
-
|
|
3308
|
-
</Tip>
|
|
3309
|
-
|
|
3310
|
-
Args:
|
|
3311
|
-
frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
|
|
3312
|
-
The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
|
|
3313
|
-
"all", all available frameworks will be tested. It is also possible to provide a single framework or a
|
|
3314
|
-
custom set of frameworks to check.
|
|
3315
|
-
|
|
3316
|
-
Returns:
|
|
3317
|
-
`Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
|
|
3318
|
-
|
|
3319
|
-
Example:
|
|
3320
|
-
```python
|
|
3321
|
-
>>> from huggingface_hub import InferenceClient
|
|
3322
|
-
>>> client = InferenceClient()
|
|
3323
|
-
|
|
3324
|
-
# Discover zero-shot-classification models currently deployed
|
|
3325
|
-
>>> models = client.list_deployed_models()
|
|
3326
|
-
>>> models["zero-shot-classification"]
|
|
3327
|
-
['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
|
|
3328
|
-
|
|
3329
|
-
# List from only 1 framework
|
|
3330
|
-
>>> client.list_deployed_models("text-generation-inference")
|
|
3331
|
-
{'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
|
|
3332
|
-
```
|
|
3333
|
-
"""
|
|
3334
|
-
if self.provider != "hf-inference":
|
|
3335
|
-
raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
|
|
3336
|
-
|
|
3337
|
-
# Resolve which frameworks to check
|
|
3338
|
-
if frameworks is None:
|
|
3339
|
-
frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
|
|
3340
|
-
elif frameworks == "all":
|
|
3341
|
-
frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
|
|
3342
|
-
elif isinstance(frameworks, str):
|
|
3343
|
-
frameworks = [frameworks]
|
|
3344
|
-
frameworks = list(set(frameworks))
|
|
3345
|
-
|
|
3346
|
-
# Fetch them iteratively
|
|
3347
|
-
models_by_task: Dict[str, List[str]] = {}
|
|
3348
|
-
|
|
3349
|
-
def _unpack_response(framework: str, items: List[Dict]) -> None:
|
|
3350
|
-
for model in items:
|
|
3351
|
-
if framework == "sentence-transformers":
|
|
3352
|
-
# Model running with the `sentence-transformers` framework can work with both tasks even if not
|
|
3353
|
-
# branded as such in the API response
|
|
3354
|
-
models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
|
|
3355
|
-
models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
|
|
3356
|
-
else:
|
|
3357
|
-
models_by_task.setdefault(model["task"], []).append(model["model_id"])
|
|
3358
|
-
|
|
3359
|
-
for framework in frameworks:
|
|
3360
|
-
response = get_session().get(
|
|
3361
|
-
f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3362
|
-
)
|
|
3363
|
-
hf_raise_for_status(response)
|
|
3364
|
-
_unpack_response(framework, response.json())
|
|
3365
|
-
|
|
3366
|
-
# Sort alphabetically for discoverability and return
|
|
3367
|
-
for task, models in models_by_task.items():
|
|
3368
|
-
models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
|
|
3369
|
-
return models_by_task
|
|
3370
|
-
|
|
3371
|
-
def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
|
|
3287
|
+
def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
|
|
3372
3288
|
"""
|
|
3373
3289
|
Get information about the deployed endpoint.
|
|
3374
3290
|
|
|
@@ -3381,7 +3297,7 @@ class InferenceClient:
|
|
|
3381
3297
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
3382
3298
|
|
|
3383
3299
|
Returns:
|
|
3384
|
-
`
|
|
3300
|
+
`dict[str, Any]`: Information about the endpoint.
|
|
3385
3301
|
|
|
3386
3302
|
Example:
|
|
3387
3303
|
```py
|
|
@@ -3431,7 +3347,6 @@ class InferenceClient:
|
|
|
3431
3347
|
Check the health of the deployed endpoint.
|
|
3432
3348
|
|
|
3433
3349
|
Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
|
|
3434
|
-
For Inference API, please use [`InferenceClient.get_model_status`] instead.
|
|
3435
3350
|
|
|
3436
3351
|
Args:
|
|
3437
3352
|
model (`str`, *optional*):
|
|
@@ -3455,75 +3370,12 @@ class InferenceClient:
|
|
|
3455
3370
|
if model is None:
|
|
3456
3371
|
raise ValueError("Model id not provided.")
|
|
3457
3372
|
if not model.startswith(("http://", "https://")):
|
|
3458
|
-
raise ValueError(
|
|
3459
|
-
"Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
|
|
3460
|
-
)
|
|
3373
|
+
raise ValueError("Model must be an Inference Endpoint URL.")
|
|
3461
3374
|
url = model.rstrip("/") + "/health"
|
|
3462
3375
|
|
|
3463
3376
|
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3464
3377
|
return response.status_code == 200
|
|
3465
3378
|
|
|
3466
|
-
@_deprecate_method(
|
|
3467
|
-
version="0.35.0",
|
|
3468
|
-
message=(
|
|
3469
|
-
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3470
|
-
" Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
|
|
3471
|
-
),
|
|
3472
|
-
)
|
|
3473
|
-
def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
|
|
3474
|
-
"""
|
|
3475
|
-
Get the status of a model hosted on the HF Inference API.
|
|
3476
|
-
|
|
3477
|
-
<Tip>
|
|
3478
|
-
|
|
3479
|
-
This endpoint is mostly useful when you already know which model you want to use and want to check its
|
|
3480
|
-
availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
|
|
3481
|
-
|
|
3482
|
-
</Tip>
|
|
3483
|
-
|
|
3484
|
-
Args:
|
|
3485
|
-
model (`str`, *optional*):
|
|
3486
|
-
Identifier of the model for witch the status gonna be checked. If model is not provided,
|
|
3487
|
-
the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
|
|
3488
|
-
identifier cannot be a URL.
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
Returns:
|
|
3492
|
-
[`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
|
|
3493
|
-
about the state of the model: load, state, compute type and framework.
|
|
3494
|
-
|
|
3495
|
-
Example:
|
|
3496
|
-
```py
|
|
3497
|
-
>>> from huggingface_hub import InferenceClient
|
|
3498
|
-
>>> client = InferenceClient()
|
|
3499
|
-
>>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
|
|
3500
|
-
ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
|
|
3501
|
-
```
|
|
3502
|
-
"""
|
|
3503
|
-
if self.provider != "hf-inference":
|
|
3504
|
-
raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
|
|
3505
|
-
|
|
3506
|
-
model = model or self.model
|
|
3507
|
-
if model is None:
|
|
3508
|
-
raise ValueError("Model id not provided.")
|
|
3509
|
-
if model.startswith("https://"):
|
|
3510
|
-
raise NotImplementedError("Model status is only available for Inference API endpoints.")
|
|
3511
|
-
url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
|
|
3512
|
-
|
|
3513
|
-
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3514
|
-
hf_raise_for_status(response)
|
|
3515
|
-
response_data = response.json()
|
|
3516
|
-
|
|
3517
|
-
if "error" in response_data:
|
|
3518
|
-
raise ValueError(response_data["error"])
|
|
3519
|
-
|
|
3520
|
-
return ModelStatus(
|
|
3521
|
-
loaded=response_data["loaded"],
|
|
3522
|
-
state=response_data["state"],
|
|
3523
|
-
compute_type=response_data["compute_type"],
|
|
3524
|
-
framework=response_data["framework"],
|
|
3525
|
-
)
|
|
3526
|
-
|
|
3527
3379
|
@property
|
|
3528
3380
|
def chat(self) -> "ProxyClientChat":
|
|
3529
3381
|
return ProxyClientChat(self)
|