huggingface-hub 0.33.5__py3-none-any.whl → 0.35.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +487 -525
- huggingface_hub/_commit_api.py +21 -28
- huggingface_hub/_jobs_api.py +145 -0
- huggingface_hub/_local_folder.py +7 -1
- huggingface_hub/_login.py +5 -5
- huggingface_hub/_oauth.py +1 -1
- huggingface_hub/_snapshot_download.py +11 -6
- huggingface_hub/_upload_large_folder.py +46 -23
- huggingface_hub/cli/__init__.py +27 -0
- huggingface_hub/cli/_cli_utils.py +69 -0
- huggingface_hub/cli/auth.py +210 -0
- huggingface_hub/cli/cache.py +405 -0
- huggingface_hub/cli/download.py +181 -0
- huggingface_hub/cli/hf.py +66 -0
- huggingface_hub/cli/jobs.py +522 -0
- huggingface_hub/cli/lfs.py +198 -0
- huggingface_hub/cli/repo.py +243 -0
- huggingface_hub/cli/repo_files.py +128 -0
- huggingface_hub/cli/system.py +52 -0
- huggingface_hub/cli/upload.py +316 -0
- huggingface_hub/cli/upload_large_folder.py +132 -0
- huggingface_hub/commands/_cli_utils.py +5 -0
- huggingface_hub/commands/delete_cache.py +3 -1
- huggingface_hub/commands/download.py +4 -0
- huggingface_hub/commands/env.py +3 -0
- huggingface_hub/commands/huggingface_cli.py +2 -0
- huggingface_hub/commands/repo.py +4 -0
- huggingface_hub/commands/repo_files.py +4 -0
- huggingface_hub/commands/scan_cache.py +3 -1
- huggingface_hub/commands/tag.py +3 -1
- huggingface_hub/commands/upload.py +4 -0
- huggingface_hub/commands/upload_large_folder.py +3 -1
- huggingface_hub/commands/user.py +11 -1
- huggingface_hub/commands/version.py +3 -0
- huggingface_hub/constants.py +1 -0
- huggingface_hub/file_download.py +16 -5
- huggingface_hub/hf_api.py +519 -7
- huggingface_hub/hf_file_system.py +8 -16
- huggingface_hub/hub_mixin.py +3 -3
- huggingface_hub/inference/_client.py +38 -39
- huggingface_hub/inference/_common.py +38 -11
- huggingface_hub/inference/_generated/_async_client.py +50 -51
- huggingface_hub/inference/_generated/types/__init__.py +1 -0
- huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
- huggingface_hub/inference/_mcp/cli.py +36 -18
- huggingface_hub/inference/_mcp/constants.py +8 -0
- huggingface_hub/inference/_mcp/types.py +3 -0
- huggingface_hub/inference/_providers/__init__.py +4 -1
- huggingface_hub/inference/_providers/_common.py +3 -6
- huggingface_hub/inference/_providers/fal_ai.py +85 -42
- huggingface_hub/inference/_providers/hf_inference.py +17 -9
- huggingface_hub/inference/_providers/replicate.py +19 -1
- huggingface_hub/keras_mixin.py +2 -2
- huggingface_hub/repocard.py +1 -1
- huggingface_hub/repository.py +2 -2
- huggingface_hub/utils/_auth.py +1 -1
- huggingface_hub/utils/_cache_manager.py +2 -2
- huggingface_hub/utils/_dotenv.py +51 -0
- huggingface_hub/utils/_headers.py +1 -1
- huggingface_hub/utils/_runtime.py +1 -1
- huggingface_hub/utils/_xet.py +6 -2
- huggingface_hub/utils/_xet_progress_reporting.py +141 -0
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/METADATA +7 -8
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/RECORD +68 -51
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/entry_points.txt +1 -0
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -118,9 +118,7 @@ class AsyncInferenceClient:
|
|
|
118
118
|
or a URL to a deployed Inference Endpoint. Defaults to None, in which case a recommended model is
|
|
119
119
|
automatically selected for the task.
|
|
120
120
|
Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
|
|
121
|
-
arguments are mutually exclusive. If
|
|
122
|
-
path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
|
|
123
|
-
documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
|
|
121
|
+
arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
|
|
124
122
|
provider (`str`, *optional*):
|
|
125
123
|
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
|
|
126
124
|
Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
|
|
@@ -1199,8 +1197,8 @@ class AsyncInferenceClient:
|
|
|
1199
1197
|
Perform image classification on the given image using the specified model.
|
|
1200
1198
|
|
|
1201
1199
|
Args:
|
|
1202
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1203
|
-
The image to classify. It can be raw bytes, an image file,
|
|
1200
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1201
|
+
The image to classify. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1204
1202
|
model (`str`, *optional*):
|
|
1205
1203
|
The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
|
|
1206
1204
|
deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
|
|
@@ -1258,8 +1256,8 @@ class AsyncInferenceClient:
|
|
|
1258
1256
|
</Tip>
|
|
1259
1257
|
|
|
1260
1258
|
Args:
|
|
1261
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1262
|
-
The image to segment. It can be raw bytes, an image file,
|
|
1259
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1260
|
+
The image to segment. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1263
1261
|
model (`str`, *optional*):
|
|
1264
1262
|
The model to use for image segmentation. Can be a model ID hosted on the Hugging Face Hub or a URL to a
|
|
1265
1263
|
deployed Inference Endpoint. If not provided, the default recommended model for image segmentation will be used.
|
|
@@ -1331,8 +1329,8 @@ class AsyncInferenceClient:
|
|
|
1331
1329
|
</Tip>
|
|
1332
1330
|
|
|
1333
1331
|
Args:
|
|
1334
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1335
|
-
The input image for translation. It can be raw bytes, an image file,
|
|
1332
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1333
|
+
The input image for translation. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1336
1334
|
prompt (`str`, *optional*):
|
|
1337
1335
|
The text prompt to guide the image generation.
|
|
1338
1336
|
negative_prompt (`str`, *optional*):
|
|
@@ -1384,6 +1382,7 @@ class AsyncInferenceClient:
|
|
|
1384
1382
|
api_key=self.token,
|
|
1385
1383
|
)
|
|
1386
1384
|
response = await self._inner_post(request_parameters)
|
|
1385
|
+
response = provider_helper.get_response(response, request_parameters)
|
|
1387
1386
|
return _bytes_to_image(response)
|
|
1388
1387
|
|
|
1389
1388
|
async def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
|
|
@@ -1394,8 +1393,8 @@ class AsyncInferenceClient:
|
|
|
1394
1393
|
(OCR), Pix2Struct, etc). Please have a look to the model card to learn more about a model's specificities.
|
|
1395
1394
|
|
|
1396
1395
|
Args:
|
|
1397
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1398
|
-
The input image to caption. It can be raw bytes, an image file,
|
|
1396
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1397
|
+
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1399
1398
|
model (`str`, *optional*):
|
|
1400
1399
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1401
1400
|
Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
|
|
@@ -1446,8 +1445,8 @@ class AsyncInferenceClient:
|
|
|
1446
1445
|
</Tip>
|
|
1447
1446
|
|
|
1448
1447
|
Args:
|
|
1449
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
1450
|
-
The image to detect objects on. It can be raw bytes, an image file,
|
|
1448
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
1449
|
+
The image to detect objects on. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
1451
1450
|
model (`str`, *optional*):
|
|
1452
1451
|
The model to use for object detection. Can be a model ID hosted on the Hugging Face Hub or a URL to a
|
|
1453
1452
|
deployed Inference Endpoint. If not provided, the default recommended model for object detection (DETR) will be used.
|
|
@@ -1913,23 +1912,23 @@ class AsyncInferenceClient:
|
|
|
1913
1912
|
return TextClassificationOutputElement.parse_obj_as_list(response)[0] # type: ignore [return-value]
|
|
1914
1913
|
|
|
1915
1914
|
@overload
|
|
1916
|
-
async def text_generation(
|
|
1915
|
+
async def text_generation(
|
|
1917
1916
|
self,
|
|
1918
1917
|
prompt: str,
|
|
1919
1918
|
*,
|
|
1920
|
-
details: Literal[
|
|
1921
|
-
stream: Literal[
|
|
1919
|
+
details: Literal[True],
|
|
1920
|
+
stream: Literal[True],
|
|
1922
1921
|
model: Optional[str] = None,
|
|
1923
1922
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
1924
1923
|
adapter_id: Optional[str] = None,
|
|
1925
1924
|
best_of: Optional[int] = None,
|
|
1926
1925
|
decoder_input_details: Optional[bool] = None,
|
|
1927
|
-
do_sample: Optional[bool] =
|
|
1926
|
+
do_sample: Optional[bool] = None,
|
|
1928
1927
|
frequency_penalty: Optional[float] = None,
|
|
1929
1928
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
1930
1929
|
max_new_tokens: Optional[int] = None,
|
|
1931
1930
|
repetition_penalty: Optional[float] = None,
|
|
1932
|
-
return_full_text: Optional[bool] =
|
|
1931
|
+
return_full_text: Optional[bool] = None,
|
|
1933
1932
|
seed: Optional[int] = None,
|
|
1934
1933
|
stop: Optional[List[str]] = None,
|
|
1935
1934
|
stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
|
|
@@ -1940,26 +1939,26 @@ class AsyncInferenceClient:
|
|
|
1940
1939
|
truncate: Optional[int] = None,
|
|
1941
1940
|
typical_p: Optional[float] = None,
|
|
1942
1941
|
watermark: Optional[bool] = None,
|
|
1943
|
-
) ->
|
|
1942
|
+
) -> AsyncIterable[TextGenerationStreamOutput]: ...
|
|
1944
1943
|
|
|
1945
1944
|
@overload
|
|
1946
|
-
async def text_generation(
|
|
1945
|
+
async def text_generation(
|
|
1947
1946
|
self,
|
|
1948
1947
|
prompt: str,
|
|
1949
1948
|
*,
|
|
1950
|
-
details: Literal[True]
|
|
1951
|
-
stream: Literal[False] =
|
|
1949
|
+
details: Literal[True],
|
|
1950
|
+
stream: Optional[Literal[False]] = None,
|
|
1952
1951
|
model: Optional[str] = None,
|
|
1953
1952
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
1954
1953
|
adapter_id: Optional[str] = None,
|
|
1955
1954
|
best_of: Optional[int] = None,
|
|
1956
1955
|
decoder_input_details: Optional[bool] = None,
|
|
1957
|
-
do_sample: Optional[bool] =
|
|
1956
|
+
do_sample: Optional[bool] = None,
|
|
1958
1957
|
frequency_penalty: Optional[float] = None,
|
|
1959
1958
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
1960
1959
|
max_new_tokens: Optional[int] = None,
|
|
1961
1960
|
repetition_penalty: Optional[float] = None,
|
|
1962
|
-
return_full_text: Optional[bool] =
|
|
1961
|
+
return_full_text: Optional[bool] = None,
|
|
1963
1962
|
seed: Optional[int] = None,
|
|
1964
1963
|
stop: Optional[List[str]] = None,
|
|
1965
1964
|
stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
|
|
@@ -1973,23 +1972,23 @@ class AsyncInferenceClient:
|
|
|
1973
1972
|
) -> TextGenerationOutput: ...
|
|
1974
1973
|
|
|
1975
1974
|
@overload
|
|
1976
|
-
async def text_generation(
|
|
1975
|
+
async def text_generation(
|
|
1977
1976
|
self,
|
|
1978
1977
|
prompt: str,
|
|
1979
1978
|
*,
|
|
1980
|
-
details: Literal[False] =
|
|
1981
|
-
stream: Literal[True]
|
|
1979
|
+
details: Optional[Literal[False]] = None,
|
|
1980
|
+
stream: Literal[True],
|
|
1982
1981
|
model: Optional[str] = None,
|
|
1983
1982
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
1984
1983
|
adapter_id: Optional[str] = None,
|
|
1985
1984
|
best_of: Optional[int] = None,
|
|
1986
1985
|
decoder_input_details: Optional[bool] = None,
|
|
1987
|
-
do_sample: Optional[bool] =
|
|
1986
|
+
do_sample: Optional[bool] = None,
|
|
1988
1987
|
frequency_penalty: Optional[float] = None,
|
|
1989
1988
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
1990
1989
|
max_new_tokens: Optional[int] = None,
|
|
1991
1990
|
repetition_penalty: Optional[float] = None,
|
|
1992
|
-
return_full_text: Optional[bool] =
|
|
1991
|
+
return_full_text: Optional[bool] = None, # Manual default value
|
|
1993
1992
|
seed: Optional[int] = None,
|
|
1994
1993
|
stop: Optional[List[str]] = None,
|
|
1995
1994
|
stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
|
|
@@ -2003,23 +2002,23 @@ class AsyncInferenceClient:
|
|
|
2003
2002
|
) -> AsyncIterable[str]: ...
|
|
2004
2003
|
|
|
2005
2004
|
@overload
|
|
2006
|
-
async def text_generation(
|
|
2005
|
+
async def text_generation(
|
|
2007
2006
|
self,
|
|
2008
2007
|
prompt: str,
|
|
2009
2008
|
*,
|
|
2010
|
-
details: Literal[
|
|
2011
|
-
stream: Literal[
|
|
2009
|
+
details: Optional[Literal[False]] = None,
|
|
2010
|
+
stream: Optional[Literal[False]] = None,
|
|
2012
2011
|
model: Optional[str] = None,
|
|
2013
2012
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
2014
2013
|
adapter_id: Optional[str] = None,
|
|
2015
2014
|
best_of: Optional[int] = None,
|
|
2016
2015
|
decoder_input_details: Optional[bool] = None,
|
|
2017
|
-
do_sample: Optional[bool] =
|
|
2016
|
+
do_sample: Optional[bool] = None,
|
|
2018
2017
|
frequency_penalty: Optional[float] = None,
|
|
2019
2018
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
2020
2019
|
max_new_tokens: Optional[int] = None,
|
|
2021
2020
|
repetition_penalty: Optional[float] = None,
|
|
2022
|
-
return_full_text: Optional[bool] =
|
|
2021
|
+
return_full_text: Optional[bool] = None,
|
|
2023
2022
|
seed: Optional[int] = None,
|
|
2024
2023
|
stop: Optional[List[str]] = None,
|
|
2025
2024
|
stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
|
|
@@ -2030,26 +2029,26 @@ class AsyncInferenceClient:
|
|
|
2030
2029
|
truncate: Optional[int] = None,
|
|
2031
2030
|
typical_p: Optional[float] = None,
|
|
2032
2031
|
watermark: Optional[bool] = None,
|
|
2033
|
-
) ->
|
|
2032
|
+
) -> str: ...
|
|
2034
2033
|
|
|
2035
2034
|
@overload
|
|
2036
2035
|
async def text_generation(
|
|
2037
2036
|
self,
|
|
2038
2037
|
prompt: str,
|
|
2039
2038
|
*,
|
|
2040
|
-
details:
|
|
2041
|
-
stream: bool =
|
|
2039
|
+
details: Optional[bool] = None,
|
|
2040
|
+
stream: Optional[bool] = None,
|
|
2042
2041
|
model: Optional[str] = None,
|
|
2043
2042
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
2044
2043
|
adapter_id: Optional[str] = None,
|
|
2045
2044
|
best_of: Optional[int] = None,
|
|
2046
2045
|
decoder_input_details: Optional[bool] = None,
|
|
2047
|
-
do_sample: Optional[bool] =
|
|
2046
|
+
do_sample: Optional[bool] = None,
|
|
2048
2047
|
frequency_penalty: Optional[float] = None,
|
|
2049
2048
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
2050
2049
|
max_new_tokens: Optional[int] = None,
|
|
2051
2050
|
repetition_penalty: Optional[float] = None,
|
|
2052
|
-
return_full_text: Optional[bool] =
|
|
2051
|
+
return_full_text: Optional[bool] = None,
|
|
2053
2052
|
seed: Optional[int] = None,
|
|
2054
2053
|
stop: Optional[List[str]] = None,
|
|
2055
2054
|
stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
|
|
@@ -2060,25 +2059,25 @@ class AsyncInferenceClient:
|
|
|
2060
2059
|
truncate: Optional[int] = None,
|
|
2061
2060
|
typical_p: Optional[float] = None,
|
|
2062
2061
|
watermark: Optional[bool] = None,
|
|
2063
|
-
) -> Union[TextGenerationOutput, AsyncIterable[TextGenerationStreamOutput]]: ...
|
|
2062
|
+
) -> Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]: ...
|
|
2064
2063
|
|
|
2065
2064
|
async def text_generation(
|
|
2066
2065
|
self,
|
|
2067
2066
|
prompt: str,
|
|
2068
2067
|
*,
|
|
2069
|
-
details: bool =
|
|
2070
|
-
stream: bool =
|
|
2068
|
+
details: Optional[bool] = None,
|
|
2069
|
+
stream: Optional[bool] = None,
|
|
2071
2070
|
model: Optional[str] = None,
|
|
2072
2071
|
# Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
|
|
2073
2072
|
adapter_id: Optional[str] = None,
|
|
2074
2073
|
best_of: Optional[int] = None,
|
|
2075
2074
|
decoder_input_details: Optional[bool] = None,
|
|
2076
|
-
do_sample: Optional[bool] =
|
|
2075
|
+
do_sample: Optional[bool] = None,
|
|
2077
2076
|
frequency_penalty: Optional[float] = None,
|
|
2078
2077
|
grammar: Optional[TextGenerationInputGrammarType] = None,
|
|
2079
2078
|
max_new_tokens: Optional[int] = None,
|
|
2080
2079
|
repetition_penalty: Optional[float] = None,
|
|
2081
|
-
return_full_text: Optional[bool] =
|
|
2080
|
+
return_full_text: Optional[bool] = None,
|
|
2082
2081
|
seed: Optional[int] = None,
|
|
2083
2082
|
stop: Optional[List[str]] = None,
|
|
2084
2083
|
stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
|
|
@@ -2156,7 +2155,7 @@ class AsyncInferenceClient:
|
|
|
2156
2155
|
typical_p (`float`, *optional`):
|
|
2157
2156
|
Typical Decoding mass
|
|
2158
2157
|
See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information
|
|
2159
|
-
watermark (`bool`, *optional
|
|
2158
|
+
watermark (`bool`, *optional*):
|
|
2160
2159
|
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
|
|
2161
2160
|
|
|
2162
2161
|
Returns:
|
|
@@ -2307,7 +2306,7 @@ class AsyncInferenceClient:
|
|
|
2307
2306
|
"repetition_penalty": repetition_penalty,
|
|
2308
2307
|
"return_full_text": return_full_text,
|
|
2309
2308
|
"seed": seed,
|
|
2310
|
-
"stop": stop
|
|
2309
|
+
"stop": stop,
|
|
2311
2310
|
"temperature": temperature,
|
|
2312
2311
|
"top_k": top_k,
|
|
2313
2312
|
"top_n_tokens": top_n_tokens,
|
|
@@ -2361,7 +2360,7 @@ class AsyncInferenceClient:
|
|
|
2361
2360
|
|
|
2362
2361
|
# Handle errors separately for more precise error messages
|
|
2363
2362
|
try:
|
|
2364
|
-
bytes_output = await self._inner_post(request_parameters, stream=stream)
|
|
2363
|
+
bytes_output = await self._inner_post(request_parameters, stream=stream or False)
|
|
2365
2364
|
except _import_aiohttp().ClientResponseError as e:
|
|
2366
2365
|
match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
|
|
2367
2366
|
if e.status == 400 and match:
|
|
@@ -3034,8 +3033,8 @@ class AsyncInferenceClient:
|
|
|
3034
3033
|
Answering open-ended questions based on an image.
|
|
3035
3034
|
|
|
3036
3035
|
Args:
|
|
3037
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
3038
|
-
The input image for the context. It can be raw bytes, an image file,
|
|
3036
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3037
|
+
The input image for the context. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3039
3038
|
question (`str`):
|
|
3040
3039
|
Question to be answered.
|
|
3041
3040
|
model (`str`, *optional*):
|
|
@@ -3204,8 +3203,8 @@ class AsyncInferenceClient:
|
|
|
3204
3203
|
Provide input image and text labels to predict text labels for the image.
|
|
3205
3204
|
|
|
3206
3205
|
Args:
|
|
3207
|
-
image (`Union[str, Path, bytes, BinaryIO]`):
|
|
3208
|
-
The input image to caption. It can be raw bytes, an image file,
|
|
3206
|
+
image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
|
|
3207
|
+
The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
|
|
3209
3208
|
candidate_labels (`List[str]`):
|
|
3210
3209
|
The candidate labels for this image
|
|
3211
3210
|
labels (`List[str]`, *optional*):
|
|
@@ -85,6 +85,7 @@ from .image_to_text import (
|
|
|
85
85
|
ImageToTextOutput,
|
|
86
86
|
ImageToTextParameters,
|
|
87
87
|
)
|
|
88
|
+
from .image_to_video import ImageToVideoInput, ImageToVideoOutput, ImageToVideoParameters, ImageToVideoTargetSize
|
|
88
89
|
from .object_detection import (
|
|
89
90
|
ObjectDetectionBoundingBox,
|
|
90
91
|
ObjectDetectionInput,
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Inference code generated from the JSON schema spec in @huggingface/tasks.
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
|
+
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from .base import BaseInferenceType, dataclass_with_extra
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass_with_extra
|
|
12
|
+
class ImageToVideoTargetSize(BaseInferenceType):
|
|
13
|
+
"""The size in pixel of the output video frames."""
|
|
14
|
+
|
|
15
|
+
height: int
|
|
16
|
+
width: int
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass_with_extra
|
|
20
|
+
class ImageToVideoParameters(BaseInferenceType):
|
|
21
|
+
"""Additional inference parameters for Image To Video"""
|
|
22
|
+
|
|
23
|
+
guidance_scale: Optional[float] = None
|
|
24
|
+
"""For diffusion models. A higher guidance scale value encourages the model to generate
|
|
25
|
+
videos closely linked to the text prompt at the expense of lower image quality.
|
|
26
|
+
"""
|
|
27
|
+
negative_prompt: Optional[str] = None
|
|
28
|
+
"""One prompt to guide what NOT to include in video generation."""
|
|
29
|
+
num_frames: Optional[float] = None
|
|
30
|
+
"""The num_frames parameter determines how many video frames are generated."""
|
|
31
|
+
num_inference_steps: Optional[int] = None
|
|
32
|
+
"""The number of denoising steps. More denoising steps usually lead to a higher quality
|
|
33
|
+
video at the expense of slower inference.
|
|
34
|
+
"""
|
|
35
|
+
prompt: Optional[str] = None
|
|
36
|
+
"""The text prompt to guide the video generation."""
|
|
37
|
+
seed: Optional[int] = None
|
|
38
|
+
"""Seed for the random number generator."""
|
|
39
|
+
target_size: Optional[ImageToVideoTargetSize] = None
|
|
40
|
+
"""The size in pixel of the output video frames."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass_with_extra
|
|
44
|
+
class ImageToVideoInput(BaseInferenceType):
|
|
45
|
+
"""Inputs for Image To Video inference"""
|
|
46
|
+
|
|
47
|
+
inputs: str
|
|
48
|
+
"""The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
49
|
+
also provide the image data as a raw bytes payload.
|
|
50
|
+
"""
|
|
51
|
+
parameters: Optional[ImageToVideoParameters] = None
|
|
52
|
+
"""Additional inference parameters for Image To Video"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass_with_extra
|
|
56
|
+
class ImageToVideoOutput(BaseInferenceType):
|
|
57
|
+
"""Outputs of inference for the Image To Video task"""
|
|
58
|
+
|
|
59
|
+
video: Any
|
|
60
|
+
"""The generated video returned as raw bytes in the payload."""
|
|
@@ -71,6 +71,8 @@ async def run_agent(
|
|
|
71
71
|
signal.signal(signal.SIGINT, lambda *_: _sigint_handler())
|
|
72
72
|
|
|
73
73
|
# Handle inputs (i.e. env variables injection)
|
|
74
|
+
resolved_inputs: dict[str, str] = {}
|
|
75
|
+
|
|
74
76
|
if len(inputs) > 0:
|
|
75
77
|
print(
|
|
76
78
|
"[bold blue]Some initial inputs are required by the agent. "
|
|
@@ -79,19 +81,26 @@ async def run_agent(
|
|
|
79
81
|
for input_item in inputs:
|
|
80
82
|
input_id = input_item["id"]
|
|
81
83
|
description = input_item["description"]
|
|
82
|
-
env_special_value = "${input:
|
|
84
|
+
env_special_value = f"${{input:{input_id}}}"
|
|
83
85
|
|
|
84
|
-
# Check
|
|
85
|
-
|
|
86
|
+
# Check if the input is used by any server or as an apiKey
|
|
87
|
+
input_usages = set()
|
|
86
88
|
for server in servers:
|
|
87
89
|
# Check stdio's "env" and http/sse's "headers" mappings
|
|
88
90
|
env_or_headers = server.get("env", {}) if server["type"] == "stdio" else server.get("headers", {})
|
|
89
91
|
for key, value in env_or_headers.items():
|
|
90
92
|
if env_special_value in value:
|
|
91
|
-
|
|
93
|
+
input_usages.add(key)
|
|
94
|
+
|
|
95
|
+
raw_api_key = config.get("apiKey")
|
|
96
|
+
if isinstance(raw_api_key, str) and env_special_value in raw_api_key:
|
|
97
|
+
input_usages.add("apiKey")
|
|
92
98
|
|
|
93
|
-
if not
|
|
94
|
-
print(
|
|
99
|
+
if not input_usages:
|
|
100
|
+
print(
|
|
101
|
+
f"[yellow]Input '{input_id}' defined in config but not used by any server or as an API key."
|
|
102
|
+
" Skipping.[/yellow]"
|
|
103
|
+
)
|
|
95
104
|
continue
|
|
96
105
|
|
|
97
106
|
# Prompt user for input
|
|
@@ -104,30 +113,39 @@ async def run_agent(
|
|
|
104
113
|
if exit_event.is_set():
|
|
105
114
|
return
|
|
106
115
|
|
|
107
|
-
#
|
|
116
|
+
# Fallback to environment variable when user left blank
|
|
117
|
+
final_value = user_input
|
|
118
|
+
if not final_value:
|
|
119
|
+
final_value = os.getenv(env_variable_key, "")
|
|
120
|
+
if final_value:
|
|
121
|
+
print(f"[green]Value successfully loaded from '{env_variable_key}'[/green]")
|
|
122
|
+
else:
|
|
123
|
+
print(
|
|
124
|
+
f"[yellow]No value found for '{env_variable_key}' in environment variables. Continuing.[/yellow]"
|
|
125
|
+
)
|
|
126
|
+
resolved_inputs[input_id] = final_value
|
|
127
|
+
|
|
128
|
+
# Inject resolved value (can be empty) into stdio's env or http/sse's headers
|
|
108
129
|
for server in servers:
|
|
109
130
|
env_or_headers = server.get("env", {}) if server["type"] == "stdio" else server.get("headers", {})
|
|
110
131
|
for key, value in env_or_headers.items():
|
|
111
132
|
if env_special_value in value:
|
|
112
|
-
|
|
113
|
-
env_or_headers[key] = env_or_headers[key].replace(env_special_value, user_input)
|
|
114
|
-
else:
|
|
115
|
-
value_from_env = os.getenv(env_variable_key, "")
|
|
116
|
-
env_or_headers[key] = env_or_headers[key].replace(env_special_value, value_from_env)
|
|
117
|
-
if value_from_env:
|
|
118
|
-
print(f"[green]Value successfully loaded from '{env_variable_key}'[/green]")
|
|
119
|
-
else:
|
|
120
|
-
print(
|
|
121
|
-
f"[yellow]No value found for '{env_variable_key}' in environment variables. Continuing.[/yellow]"
|
|
122
|
-
)
|
|
133
|
+
env_or_headers[key] = env_or_headers[key].replace(env_special_value, final_value)
|
|
123
134
|
|
|
124
135
|
print()
|
|
125
136
|
|
|
137
|
+
raw_api_key = config.get("apiKey")
|
|
138
|
+
if isinstance(raw_api_key, str):
|
|
139
|
+
substituted_api_key = raw_api_key
|
|
140
|
+
for input_id, val in resolved_inputs.items():
|
|
141
|
+
substituted_api_key = substituted_api_key.replace(f"${{input:{input_id}}}", val)
|
|
142
|
+
config["apiKey"] = substituted_api_key
|
|
126
143
|
# Main agent loop
|
|
127
144
|
async with Agent(
|
|
128
145
|
provider=config.get("provider"), # type: ignore[arg-type]
|
|
129
146
|
model=config.get("model"),
|
|
130
147
|
base_url=config.get("endpointUrl"), # type: ignore[arg-type]
|
|
148
|
+
api_key=config.get("apiKey"),
|
|
131
149
|
servers=servers, # type: ignore[arg-type]
|
|
132
150
|
prompt=prompt,
|
|
133
151
|
) as agent:
|
|
@@ -54,6 +54,10 @@ TASK_COMPLETE_TOOL: ChatCompletionInputTool = ChatCompletionInputTool.parse_obj(
|
|
|
54
54
|
"function": {
|
|
55
55
|
"name": "task_complete",
|
|
56
56
|
"description": "Call this tool when the task given by the user is complete",
|
|
57
|
+
"parameters": {
|
|
58
|
+
"type": "object",
|
|
59
|
+
"properties": {},
|
|
60
|
+
},
|
|
57
61
|
},
|
|
58
62
|
}
|
|
59
63
|
)
|
|
@@ -64,6 +68,10 @@ ASK_QUESTION_TOOL: ChatCompletionInputTool = ChatCompletionInputTool.parse_obj(
|
|
|
64
68
|
"function": {
|
|
65
69
|
"name": "ask_question",
|
|
66
70
|
"description": "Ask the user for more info required to solve or clarify their problem.",
|
|
71
|
+
"parameters": {
|
|
72
|
+
"type": "object",
|
|
73
|
+
"properties": {},
|
|
74
|
+
},
|
|
67
75
|
},
|
|
68
76
|
}
|
|
69
77
|
)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from typing import Dict, List, Literal, TypedDict, Union
|
|
2
2
|
|
|
3
|
+
from typing_extensions import NotRequired
|
|
4
|
+
|
|
3
5
|
|
|
4
6
|
class InputConfig(TypedDict, total=False):
|
|
5
7
|
id: str
|
|
@@ -35,5 +37,6 @@ ServerConfig = Union[StdioServerConfig, HTTPServerConfig, SSEServerConfig]
|
|
|
35
37
|
class AgentConfig(TypedDict):
|
|
36
38
|
model: str
|
|
37
39
|
provider: str
|
|
40
|
+
apiKey: NotRequired[str]
|
|
38
41
|
inputs: List[InputConfig]
|
|
39
42
|
servers: List[ServerConfig]
|
|
@@ -12,6 +12,7 @@ from .cerebras import CerebrasConversationalTask
|
|
|
12
12
|
from .cohere import CohereConversationalTask
|
|
13
13
|
from .fal_ai import (
|
|
14
14
|
FalAIAutomaticSpeechRecognitionTask,
|
|
15
|
+
FalAIImageToImageTask,
|
|
15
16
|
FalAITextToImageTask,
|
|
16
17
|
FalAITextToSpeechTask,
|
|
17
18
|
FalAITextToVideoTask,
|
|
@@ -34,7 +35,7 @@ from .nebius import (
|
|
|
34
35
|
from .novita import NovitaConversationalTask, NovitaTextGenerationTask, NovitaTextToVideoTask
|
|
35
36
|
from .nscale import NscaleConversationalTask, NscaleTextToImageTask
|
|
36
37
|
from .openai import OpenAIConversationalTask
|
|
37
|
-
from .replicate import ReplicateTask, ReplicateTextToImageTask, ReplicateTextToSpeechTask
|
|
38
|
+
from .replicate import ReplicateImageToImageTask, ReplicateTask, ReplicateTextToImageTask, ReplicateTextToSpeechTask
|
|
38
39
|
from .sambanova import SambanovaConversationalTask, SambanovaFeatureExtractionTask
|
|
39
40
|
from .together import TogetherConversationalTask, TogetherTextGenerationTask, TogetherTextToImageTask
|
|
40
41
|
|
|
@@ -78,6 +79,7 @@ PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
|
|
|
78
79
|
"text-to-image": FalAITextToImageTask(),
|
|
79
80
|
"text-to-speech": FalAITextToSpeechTask(),
|
|
80
81
|
"text-to-video": FalAITextToVideoTask(),
|
|
82
|
+
"image-to-image": FalAIImageToImageTask(),
|
|
81
83
|
},
|
|
82
84
|
"featherless-ai": {
|
|
83
85
|
"conversational": FeatherlessConversationalTask(),
|
|
@@ -141,6 +143,7 @@ PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
|
|
|
141
143
|
"conversational": OpenAIConversationalTask(),
|
|
142
144
|
},
|
|
143
145
|
"replicate": {
|
|
146
|
+
"image-to-image": ReplicateImageToImageTask(),
|
|
144
147
|
"text-to-image": ReplicateTextToImageTask(),
|
|
145
148
|
"text-to-speech": ReplicateTextToSpeechTask(),
|
|
146
149
|
"text-to-video": ReplicateTask("text-to-video"),
|
|
@@ -51,9 +51,6 @@ def filter_none(obj: Union[Dict[str, Any], List[Any]]) -> Union[Dict[str, Any],
|
|
|
51
51
|
continue
|
|
52
52
|
if isinstance(v, (dict, list)):
|
|
53
53
|
v = filter_none(v)
|
|
54
|
-
# remove empty nested dicts
|
|
55
|
-
if isinstance(v, dict) and not v:
|
|
56
|
-
continue
|
|
57
54
|
cleaned[k] = v
|
|
58
55
|
return cleaned
|
|
59
56
|
|
|
@@ -101,7 +98,7 @@ class TaskProviderHelper:
|
|
|
101
98
|
# prepare payload (to customize in subclasses)
|
|
102
99
|
payload = self._prepare_payload_as_dict(inputs, parameters, provider_mapping_info=provider_mapping_info)
|
|
103
100
|
if payload is not None:
|
|
104
|
-
payload = recursive_merge(payload, extra_payload or {})
|
|
101
|
+
payload = recursive_merge(payload, filter_none(extra_payload or {}))
|
|
105
102
|
|
|
106
103
|
# body data (to customize in subclasses)
|
|
107
104
|
data = self._prepare_payload_as_bytes(inputs, parameters, provider_mapping_info, extra_payload)
|
|
@@ -134,7 +131,7 @@ class TaskProviderHelper:
|
|
|
134
131
|
api_key = get_token()
|
|
135
132
|
if api_key is None:
|
|
136
133
|
raise ValueError(
|
|
137
|
-
f"You must provide an api_key to work with {self.provider} API or log in with `
|
|
134
|
+
f"You must provide an api_key to work with {self.provider} API or log in with `hf auth login`."
|
|
138
135
|
)
|
|
139
136
|
return api_key
|
|
140
137
|
|
|
@@ -270,7 +267,7 @@ class BaseTextGenerationTask(TaskProviderHelper):
|
|
|
270
267
|
def _prepare_payload_as_dict(
|
|
271
268
|
self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
|
|
272
269
|
) -> Optional[Dict]:
|
|
273
|
-
return {"prompt": inputs, **
|
|
270
|
+
return filter_none({"prompt": inputs, **parameters, "model": provider_mapping_info.provider_id})
|
|
274
271
|
|
|
275
272
|
|
|
276
273
|
@lru_cache(maxsize=None)
|