huggingface-hub 0.33.5__py3-none-any.whl → 0.35.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (68) hide show
  1. huggingface_hub/__init__.py +487 -525
  2. huggingface_hub/_commit_api.py +21 -28
  3. huggingface_hub/_jobs_api.py +145 -0
  4. huggingface_hub/_local_folder.py +7 -1
  5. huggingface_hub/_login.py +5 -5
  6. huggingface_hub/_oauth.py +1 -1
  7. huggingface_hub/_snapshot_download.py +11 -6
  8. huggingface_hub/_upload_large_folder.py +46 -23
  9. huggingface_hub/cli/__init__.py +27 -0
  10. huggingface_hub/cli/_cli_utils.py +69 -0
  11. huggingface_hub/cli/auth.py +210 -0
  12. huggingface_hub/cli/cache.py +405 -0
  13. huggingface_hub/cli/download.py +181 -0
  14. huggingface_hub/cli/hf.py +66 -0
  15. huggingface_hub/cli/jobs.py +522 -0
  16. huggingface_hub/cli/lfs.py +198 -0
  17. huggingface_hub/cli/repo.py +243 -0
  18. huggingface_hub/cli/repo_files.py +128 -0
  19. huggingface_hub/cli/system.py +52 -0
  20. huggingface_hub/cli/upload.py +316 -0
  21. huggingface_hub/cli/upload_large_folder.py +132 -0
  22. huggingface_hub/commands/_cli_utils.py +5 -0
  23. huggingface_hub/commands/delete_cache.py +3 -1
  24. huggingface_hub/commands/download.py +4 -0
  25. huggingface_hub/commands/env.py +3 -0
  26. huggingface_hub/commands/huggingface_cli.py +2 -0
  27. huggingface_hub/commands/repo.py +4 -0
  28. huggingface_hub/commands/repo_files.py +4 -0
  29. huggingface_hub/commands/scan_cache.py +3 -1
  30. huggingface_hub/commands/tag.py +3 -1
  31. huggingface_hub/commands/upload.py +4 -0
  32. huggingface_hub/commands/upload_large_folder.py +3 -1
  33. huggingface_hub/commands/user.py +11 -1
  34. huggingface_hub/commands/version.py +3 -0
  35. huggingface_hub/constants.py +1 -0
  36. huggingface_hub/file_download.py +16 -5
  37. huggingface_hub/hf_api.py +519 -7
  38. huggingface_hub/hf_file_system.py +8 -16
  39. huggingface_hub/hub_mixin.py +3 -3
  40. huggingface_hub/inference/_client.py +38 -39
  41. huggingface_hub/inference/_common.py +38 -11
  42. huggingface_hub/inference/_generated/_async_client.py +50 -51
  43. huggingface_hub/inference/_generated/types/__init__.py +1 -0
  44. huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
  45. huggingface_hub/inference/_mcp/cli.py +36 -18
  46. huggingface_hub/inference/_mcp/constants.py +8 -0
  47. huggingface_hub/inference/_mcp/types.py +3 -0
  48. huggingface_hub/inference/_providers/__init__.py +4 -1
  49. huggingface_hub/inference/_providers/_common.py +3 -6
  50. huggingface_hub/inference/_providers/fal_ai.py +85 -42
  51. huggingface_hub/inference/_providers/hf_inference.py +17 -9
  52. huggingface_hub/inference/_providers/replicate.py +19 -1
  53. huggingface_hub/keras_mixin.py +2 -2
  54. huggingface_hub/repocard.py +1 -1
  55. huggingface_hub/repository.py +2 -2
  56. huggingface_hub/utils/_auth.py +1 -1
  57. huggingface_hub/utils/_cache_manager.py +2 -2
  58. huggingface_hub/utils/_dotenv.py +51 -0
  59. huggingface_hub/utils/_headers.py +1 -1
  60. huggingface_hub/utils/_runtime.py +1 -1
  61. huggingface_hub/utils/_xet.py +6 -2
  62. huggingface_hub/utils/_xet_progress_reporting.py +141 -0
  63. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/METADATA +7 -8
  64. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/RECORD +68 -51
  65. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/entry_points.txt +1 -0
  66. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/LICENSE +0 -0
  67. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/WHEEL +0 -0
  68. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/top_level.txt +0 -0
@@ -118,9 +118,7 @@ class AsyncInferenceClient:
118
118
  or a URL to a deployed Inference Endpoint. Defaults to None, in which case a recommended model is
119
119
  automatically selected for the task.
120
120
  Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
121
- arguments are mutually exclusive. If using `base_url` for chat completion, the `/chat/completions` suffix
122
- path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
123
- documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
121
+ arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
124
122
  provider (`str`, *optional*):
125
123
  Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
126
124
  Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
@@ -1199,8 +1197,8 @@ class AsyncInferenceClient:
1199
1197
  Perform image classification on the given image using the specified model.
1200
1198
 
1201
1199
  Args:
1202
- image (`Union[str, Path, bytes, BinaryIO]`):
1203
- The image to classify. It can be raw bytes, an image file, or a URL to an online image.
1200
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1201
+ The image to classify. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1204
1202
  model (`str`, *optional*):
1205
1203
  The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1206
1204
  deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
@@ -1258,8 +1256,8 @@ class AsyncInferenceClient:
1258
1256
  </Tip>
1259
1257
 
1260
1258
  Args:
1261
- image (`Union[str, Path, bytes, BinaryIO]`):
1262
- The image to segment. It can be raw bytes, an image file, or a URL to an online image.
1259
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1260
+ The image to segment. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1263
1261
  model (`str`, *optional*):
1264
1262
  The model to use for image segmentation. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1265
1263
  deployed Inference Endpoint. If not provided, the default recommended model for image segmentation will be used.
@@ -1331,8 +1329,8 @@ class AsyncInferenceClient:
1331
1329
  </Tip>
1332
1330
 
1333
1331
  Args:
1334
- image (`Union[str, Path, bytes, BinaryIO]`):
1335
- The input image for translation. It can be raw bytes, an image file, or a URL to an online image.
1332
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1333
+ The input image for translation. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1336
1334
  prompt (`str`, *optional*):
1337
1335
  The text prompt to guide the image generation.
1338
1336
  negative_prompt (`str`, *optional*):
@@ -1384,6 +1382,7 @@ class AsyncInferenceClient:
1384
1382
  api_key=self.token,
1385
1383
  )
1386
1384
  response = await self._inner_post(request_parameters)
1385
+ response = provider_helper.get_response(response, request_parameters)
1387
1386
  return _bytes_to_image(response)
1388
1387
 
1389
1388
  async def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
@@ -1394,8 +1393,8 @@ class AsyncInferenceClient:
1394
1393
  (OCR), Pix2Struct, etc). Please have a look to the model card to learn more about a model's specificities.
1395
1394
 
1396
1395
  Args:
1397
- image (`Union[str, Path, bytes, BinaryIO]`):
1398
- The input image to caption. It can be raw bytes, an image file, or a URL to an online image..
1396
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1397
+ The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1399
1398
  model (`str`, *optional*):
1400
1399
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1401
1400
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
@@ -1446,8 +1445,8 @@ class AsyncInferenceClient:
1446
1445
  </Tip>
1447
1446
 
1448
1447
  Args:
1449
- image (`Union[str, Path, bytes, BinaryIO]`):
1450
- The image to detect objects on. It can be raw bytes, an image file, or a URL to an online image.
1448
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1449
+ The image to detect objects on. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1451
1450
  model (`str`, *optional*):
1452
1451
  The model to use for object detection. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1453
1452
  deployed Inference Endpoint. If not provided, the default recommended model for object detection (DETR) will be used.
@@ -1913,23 +1912,23 @@ class AsyncInferenceClient:
1913
1912
  return TextClassificationOutputElement.parse_obj_as_list(response)[0] # type: ignore [return-value]
1914
1913
 
1915
1914
  @overload
1916
- async def text_generation( # type: ignore
1915
+ async def text_generation(
1917
1916
  self,
1918
1917
  prompt: str,
1919
1918
  *,
1920
- details: Literal[False] = ...,
1921
- stream: Literal[False] = ...,
1919
+ details: Literal[True],
1920
+ stream: Literal[True],
1922
1921
  model: Optional[str] = None,
1923
1922
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
1924
1923
  adapter_id: Optional[str] = None,
1925
1924
  best_of: Optional[int] = None,
1926
1925
  decoder_input_details: Optional[bool] = None,
1927
- do_sample: Optional[bool] = False, # Manual default value
1926
+ do_sample: Optional[bool] = None,
1928
1927
  frequency_penalty: Optional[float] = None,
1929
1928
  grammar: Optional[TextGenerationInputGrammarType] = None,
1930
1929
  max_new_tokens: Optional[int] = None,
1931
1930
  repetition_penalty: Optional[float] = None,
1932
- return_full_text: Optional[bool] = False, # Manual default value
1931
+ return_full_text: Optional[bool] = None,
1933
1932
  seed: Optional[int] = None,
1934
1933
  stop: Optional[List[str]] = None,
1935
1934
  stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
@@ -1940,26 +1939,26 @@ class AsyncInferenceClient:
1940
1939
  truncate: Optional[int] = None,
1941
1940
  typical_p: Optional[float] = None,
1942
1941
  watermark: Optional[bool] = None,
1943
- ) -> str: ...
1942
+ ) -> AsyncIterable[TextGenerationStreamOutput]: ...
1944
1943
 
1945
1944
  @overload
1946
- async def text_generation( # type: ignore
1945
+ async def text_generation(
1947
1946
  self,
1948
1947
  prompt: str,
1949
1948
  *,
1950
- details: Literal[True] = ...,
1951
- stream: Literal[False] = ...,
1949
+ details: Literal[True],
1950
+ stream: Optional[Literal[False]] = None,
1952
1951
  model: Optional[str] = None,
1953
1952
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
1954
1953
  adapter_id: Optional[str] = None,
1955
1954
  best_of: Optional[int] = None,
1956
1955
  decoder_input_details: Optional[bool] = None,
1957
- do_sample: Optional[bool] = False, # Manual default value
1956
+ do_sample: Optional[bool] = None,
1958
1957
  frequency_penalty: Optional[float] = None,
1959
1958
  grammar: Optional[TextGenerationInputGrammarType] = None,
1960
1959
  max_new_tokens: Optional[int] = None,
1961
1960
  repetition_penalty: Optional[float] = None,
1962
- return_full_text: Optional[bool] = False, # Manual default value
1961
+ return_full_text: Optional[bool] = None,
1963
1962
  seed: Optional[int] = None,
1964
1963
  stop: Optional[List[str]] = None,
1965
1964
  stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
@@ -1973,23 +1972,23 @@ class AsyncInferenceClient:
1973
1972
  ) -> TextGenerationOutput: ...
1974
1973
 
1975
1974
  @overload
1976
- async def text_generation( # type: ignore
1975
+ async def text_generation(
1977
1976
  self,
1978
1977
  prompt: str,
1979
1978
  *,
1980
- details: Literal[False] = ...,
1981
- stream: Literal[True] = ...,
1979
+ details: Optional[Literal[False]] = None,
1980
+ stream: Literal[True],
1982
1981
  model: Optional[str] = None,
1983
1982
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
1984
1983
  adapter_id: Optional[str] = None,
1985
1984
  best_of: Optional[int] = None,
1986
1985
  decoder_input_details: Optional[bool] = None,
1987
- do_sample: Optional[bool] = False, # Manual default value
1986
+ do_sample: Optional[bool] = None,
1988
1987
  frequency_penalty: Optional[float] = None,
1989
1988
  grammar: Optional[TextGenerationInputGrammarType] = None,
1990
1989
  max_new_tokens: Optional[int] = None,
1991
1990
  repetition_penalty: Optional[float] = None,
1992
- return_full_text: Optional[bool] = False, # Manual default value
1991
+ return_full_text: Optional[bool] = None, # Manual default value
1993
1992
  seed: Optional[int] = None,
1994
1993
  stop: Optional[List[str]] = None,
1995
1994
  stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
@@ -2003,23 +2002,23 @@ class AsyncInferenceClient:
2003
2002
  ) -> AsyncIterable[str]: ...
2004
2003
 
2005
2004
  @overload
2006
- async def text_generation( # type: ignore
2005
+ async def text_generation(
2007
2006
  self,
2008
2007
  prompt: str,
2009
2008
  *,
2010
- details: Literal[True] = ...,
2011
- stream: Literal[True] = ...,
2009
+ details: Optional[Literal[False]] = None,
2010
+ stream: Optional[Literal[False]] = None,
2012
2011
  model: Optional[str] = None,
2013
2012
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
2014
2013
  adapter_id: Optional[str] = None,
2015
2014
  best_of: Optional[int] = None,
2016
2015
  decoder_input_details: Optional[bool] = None,
2017
- do_sample: Optional[bool] = False, # Manual default value
2016
+ do_sample: Optional[bool] = None,
2018
2017
  frequency_penalty: Optional[float] = None,
2019
2018
  grammar: Optional[TextGenerationInputGrammarType] = None,
2020
2019
  max_new_tokens: Optional[int] = None,
2021
2020
  repetition_penalty: Optional[float] = None,
2022
- return_full_text: Optional[bool] = False, # Manual default value
2021
+ return_full_text: Optional[bool] = None,
2023
2022
  seed: Optional[int] = None,
2024
2023
  stop: Optional[List[str]] = None,
2025
2024
  stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
@@ -2030,26 +2029,26 @@ class AsyncInferenceClient:
2030
2029
  truncate: Optional[int] = None,
2031
2030
  typical_p: Optional[float] = None,
2032
2031
  watermark: Optional[bool] = None,
2033
- ) -> AsyncIterable[TextGenerationStreamOutput]: ...
2032
+ ) -> str: ...
2034
2033
 
2035
2034
  @overload
2036
2035
  async def text_generation(
2037
2036
  self,
2038
2037
  prompt: str,
2039
2038
  *,
2040
- details: Literal[True] = ...,
2041
- stream: bool = ...,
2039
+ details: Optional[bool] = None,
2040
+ stream: Optional[bool] = None,
2042
2041
  model: Optional[str] = None,
2043
2042
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
2044
2043
  adapter_id: Optional[str] = None,
2045
2044
  best_of: Optional[int] = None,
2046
2045
  decoder_input_details: Optional[bool] = None,
2047
- do_sample: Optional[bool] = False, # Manual default value
2046
+ do_sample: Optional[bool] = None,
2048
2047
  frequency_penalty: Optional[float] = None,
2049
2048
  grammar: Optional[TextGenerationInputGrammarType] = None,
2050
2049
  max_new_tokens: Optional[int] = None,
2051
2050
  repetition_penalty: Optional[float] = None,
2052
- return_full_text: Optional[bool] = False, # Manual default value
2051
+ return_full_text: Optional[bool] = None,
2053
2052
  seed: Optional[int] = None,
2054
2053
  stop: Optional[List[str]] = None,
2055
2054
  stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
@@ -2060,25 +2059,25 @@ class AsyncInferenceClient:
2060
2059
  truncate: Optional[int] = None,
2061
2060
  typical_p: Optional[float] = None,
2062
2061
  watermark: Optional[bool] = None,
2063
- ) -> Union[TextGenerationOutput, AsyncIterable[TextGenerationStreamOutput]]: ...
2062
+ ) -> Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]: ...
2064
2063
 
2065
2064
  async def text_generation(
2066
2065
  self,
2067
2066
  prompt: str,
2068
2067
  *,
2069
- details: bool = False,
2070
- stream: bool = False,
2068
+ details: Optional[bool] = None,
2069
+ stream: Optional[bool] = None,
2071
2070
  model: Optional[str] = None,
2072
2071
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
2073
2072
  adapter_id: Optional[str] = None,
2074
2073
  best_of: Optional[int] = None,
2075
2074
  decoder_input_details: Optional[bool] = None,
2076
- do_sample: Optional[bool] = False, # Manual default value
2075
+ do_sample: Optional[bool] = None,
2077
2076
  frequency_penalty: Optional[float] = None,
2078
2077
  grammar: Optional[TextGenerationInputGrammarType] = None,
2079
2078
  max_new_tokens: Optional[int] = None,
2080
2079
  repetition_penalty: Optional[float] = None,
2081
- return_full_text: Optional[bool] = False, # Manual default value
2080
+ return_full_text: Optional[bool] = None,
2082
2081
  seed: Optional[int] = None,
2083
2082
  stop: Optional[List[str]] = None,
2084
2083
  stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
@@ -2156,7 +2155,7 @@ class AsyncInferenceClient:
2156
2155
  typical_p (`float`, *optional`):
2157
2156
  Typical Decoding mass
2158
2157
  See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information
2159
- watermark (`bool`, *optional`):
2158
+ watermark (`bool`, *optional*):
2160
2159
  Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
2161
2160
 
2162
2161
  Returns:
@@ -2307,7 +2306,7 @@ class AsyncInferenceClient:
2307
2306
  "repetition_penalty": repetition_penalty,
2308
2307
  "return_full_text": return_full_text,
2309
2308
  "seed": seed,
2310
- "stop": stop if stop is not None else [],
2309
+ "stop": stop,
2311
2310
  "temperature": temperature,
2312
2311
  "top_k": top_k,
2313
2312
  "top_n_tokens": top_n_tokens,
@@ -2361,7 +2360,7 @@ class AsyncInferenceClient:
2361
2360
 
2362
2361
  # Handle errors separately for more precise error messages
2363
2362
  try:
2364
- bytes_output = await self._inner_post(request_parameters, stream=stream)
2363
+ bytes_output = await self._inner_post(request_parameters, stream=stream or False)
2365
2364
  except _import_aiohttp().ClientResponseError as e:
2366
2365
  match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
2367
2366
  if e.status == 400 and match:
@@ -3034,8 +3033,8 @@ class AsyncInferenceClient:
3034
3033
  Answering open-ended questions based on an image.
3035
3034
 
3036
3035
  Args:
3037
- image (`Union[str, Path, bytes, BinaryIO]`):
3038
- The input image for the context. It can be raw bytes, an image file, or a URL to an online image.
3036
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3037
+ The input image for the context. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3039
3038
  question (`str`):
3040
3039
  Question to be answered.
3041
3040
  model (`str`, *optional*):
@@ -3204,8 +3203,8 @@ class AsyncInferenceClient:
3204
3203
  Provide input image and text labels to predict text labels for the image.
3205
3204
 
3206
3205
  Args:
3207
- image (`Union[str, Path, bytes, BinaryIO]`):
3208
- The input image to caption. It can be raw bytes, an image file, or a URL to an online image.
3206
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3207
+ The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3209
3208
  candidate_labels (`List[str]`):
3210
3209
  The candidate labels for this image
3211
3210
  labels (`List[str]`, *optional*):
@@ -85,6 +85,7 @@ from .image_to_text import (
85
85
  ImageToTextOutput,
86
86
  ImageToTextParameters,
87
87
  )
88
+ from .image_to_video import ImageToVideoInput, ImageToVideoOutput, ImageToVideoParameters, ImageToVideoTargetSize
88
89
  from .object_detection import (
89
90
  ObjectDetectionBoundingBox,
90
91
  ObjectDetectionInput,
@@ -0,0 +1,60 @@
1
+ # Inference code generated from the JSON schema spec in @huggingface/tasks.
2
+ #
3
+ # See:
4
+ # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
+ # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
+ from typing import Any, Optional
7
+
8
+ from .base import BaseInferenceType, dataclass_with_extra
9
+
10
+
11
+ @dataclass_with_extra
12
+ class ImageToVideoTargetSize(BaseInferenceType):
13
+ """The size in pixel of the output video frames."""
14
+
15
+ height: int
16
+ width: int
17
+
18
+
19
+ @dataclass_with_extra
20
+ class ImageToVideoParameters(BaseInferenceType):
21
+ """Additional inference parameters for Image To Video"""
22
+
23
+ guidance_scale: Optional[float] = None
24
+ """For diffusion models. A higher guidance scale value encourages the model to generate
25
+ videos closely linked to the text prompt at the expense of lower image quality.
26
+ """
27
+ negative_prompt: Optional[str] = None
28
+ """One prompt to guide what NOT to include in video generation."""
29
+ num_frames: Optional[float] = None
30
+ """The num_frames parameter determines how many video frames are generated."""
31
+ num_inference_steps: Optional[int] = None
32
+ """The number of denoising steps. More denoising steps usually lead to a higher quality
33
+ video at the expense of slower inference.
34
+ """
35
+ prompt: Optional[str] = None
36
+ """The text prompt to guide the video generation."""
37
+ seed: Optional[int] = None
38
+ """Seed for the random number generator."""
39
+ target_size: Optional[ImageToVideoTargetSize] = None
40
+ """The size in pixel of the output video frames."""
41
+
42
+
43
+ @dataclass_with_extra
44
+ class ImageToVideoInput(BaseInferenceType):
45
+ """Inputs for Image To Video inference"""
46
+
47
+ inputs: str
48
+ """The input image data as a base64-encoded string. If no `parameters` are provided, you can
49
+ also provide the image data as a raw bytes payload.
50
+ """
51
+ parameters: Optional[ImageToVideoParameters] = None
52
+ """Additional inference parameters for Image To Video"""
53
+
54
+
55
+ @dataclass_with_extra
56
+ class ImageToVideoOutput(BaseInferenceType):
57
+ """Outputs of inference for the Image To Video task"""
58
+
59
+ video: Any
60
+ """The generated video returned as raw bytes in the payload."""
@@ -71,6 +71,8 @@ async def run_agent(
71
71
  signal.signal(signal.SIGINT, lambda *_: _sigint_handler())
72
72
 
73
73
  # Handle inputs (i.e. env variables injection)
74
+ resolved_inputs: dict[str, str] = {}
75
+
74
76
  if len(inputs) > 0:
75
77
  print(
76
78
  "[bold blue]Some initial inputs are required by the agent. "
@@ -79,19 +81,26 @@ async def run_agent(
79
81
  for input_item in inputs:
80
82
  input_id = input_item["id"]
81
83
  description = input_item["description"]
82
- env_special_value = "${input:" + input_id + "}" # Special value to indicate env variable injection
84
+ env_special_value = f"${{input:{input_id}}}"
83
85
 
84
- # Check env variables that will use this input
85
- input_vars = set()
86
+ # Check if the input is used by any server or as an apiKey
87
+ input_usages = set()
86
88
  for server in servers:
87
89
  # Check stdio's "env" and http/sse's "headers" mappings
88
90
  env_or_headers = server.get("env", {}) if server["type"] == "stdio" else server.get("headers", {})
89
91
  for key, value in env_or_headers.items():
90
92
  if env_special_value in value:
91
- input_vars.add(key)
93
+ input_usages.add(key)
94
+
95
+ raw_api_key = config.get("apiKey")
96
+ if isinstance(raw_api_key, str) and env_special_value in raw_api_key:
97
+ input_usages.add("apiKey")
92
98
 
93
- if not input_vars:
94
- print(f"[yellow]Input {input_id} defined in config but not used by any server.[/yellow]")
99
+ if not input_usages:
100
+ print(
101
+ f"[yellow]Input '{input_id}' defined in config but not used by any server or as an API key."
102
+ " Skipping.[/yellow]"
103
+ )
95
104
  continue
96
105
 
97
106
  # Prompt user for input
@@ -104,30 +113,39 @@ async def run_agent(
104
113
  if exit_event.is_set():
105
114
  return
106
115
 
107
- # Inject user input (or env variable) into stdio's env or http/sse's headers
116
+ # Fallback to environment variable when user left blank
117
+ final_value = user_input
118
+ if not final_value:
119
+ final_value = os.getenv(env_variable_key, "")
120
+ if final_value:
121
+ print(f"[green]Value successfully loaded from '{env_variable_key}'[/green]")
122
+ else:
123
+ print(
124
+ f"[yellow]No value found for '{env_variable_key}' in environment variables. Continuing.[/yellow]"
125
+ )
126
+ resolved_inputs[input_id] = final_value
127
+
128
+ # Inject resolved value (can be empty) into stdio's env or http/sse's headers
108
129
  for server in servers:
109
130
  env_or_headers = server.get("env", {}) if server["type"] == "stdio" else server.get("headers", {})
110
131
  for key, value in env_or_headers.items():
111
132
  if env_special_value in value:
112
- if user_input:
113
- env_or_headers[key] = env_or_headers[key].replace(env_special_value, user_input)
114
- else:
115
- value_from_env = os.getenv(env_variable_key, "")
116
- env_or_headers[key] = env_or_headers[key].replace(env_special_value, value_from_env)
117
- if value_from_env:
118
- print(f"[green]Value successfully loaded from '{env_variable_key}'[/green]")
119
- else:
120
- print(
121
- f"[yellow]No value found for '{env_variable_key}' in environment variables. Continuing.[/yellow]"
122
- )
133
+ env_or_headers[key] = env_or_headers[key].replace(env_special_value, final_value)
123
134
 
124
135
  print()
125
136
 
137
+ raw_api_key = config.get("apiKey")
138
+ if isinstance(raw_api_key, str):
139
+ substituted_api_key = raw_api_key
140
+ for input_id, val in resolved_inputs.items():
141
+ substituted_api_key = substituted_api_key.replace(f"${{input:{input_id}}}", val)
142
+ config["apiKey"] = substituted_api_key
126
143
  # Main agent loop
127
144
  async with Agent(
128
145
  provider=config.get("provider"), # type: ignore[arg-type]
129
146
  model=config.get("model"),
130
147
  base_url=config.get("endpointUrl"), # type: ignore[arg-type]
148
+ api_key=config.get("apiKey"),
131
149
  servers=servers, # type: ignore[arg-type]
132
150
  prompt=prompt,
133
151
  ) as agent:
@@ -54,6 +54,10 @@ TASK_COMPLETE_TOOL: ChatCompletionInputTool = ChatCompletionInputTool.parse_obj(
54
54
  "function": {
55
55
  "name": "task_complete",
56
56
  "description": "Call this tool when the task given by the user is complete",
57
+ "parameters": {
58
+ "type": "object",
59
+ "properties": {},
60
+ },
57
61
  },
58
62
  }
59
63
  )
@@ -64,6 +68,10 @@ ASK_QUESTION_TOOL: ChatCompletionInputTool = ChatCompletionInputTool.parse_obj(
64
68
  "function": {
65
69
  "name": "ask_question",
66
70
  "description": "Ask the user for more info required to solve or clarify their problem.",
71
+ "parameters": {
72
+ "type": "object",
73
+ "properties": {},
74
+ },
67
75
  },
68
76
  }
69
77
  )
@@ -1,5 +1,7 @@
1
1
  from typing import Dict, List, Literal, TypedDict, Union
2
2
 
3
+ from typing_extensions import NotRequired
4
+
3
5
 
4
6
  class InputConfig(TypedDict, total=False):
5
7
  id: str
@@ -35,5 +37,6 @@ ServerConfig = Union[StdioServerConfig, HTTPServerConfig, SSEServerConfig]
35
37
  class AgentConfig(TypedDict):
36
38
  model: str
37
39
  provider: str
40
+ apiKey: NotRequired[str]
38
41
  inputs: List[InputConfig]
39
42
  servers: List[ServerConfig]
@@ -12,6 +12,7 @@ from .cerebras import CerebrasConversationalTask
12
12
  from .cohere import CohereConversationalTask
13
13
  from .fal_ai import (
14
14
  FalAIAutomaticSpeechRecognitionTask,
15
+ FalAIImageToImageTask,
15
16
  FalAITextToImageTask,
16
17
  FalAITextToSpeechTask,
17
18
  FalAITextToVideoTask,
@@ -34,7 +35,7 @@ from .nebius import (
34
35
  from .novita import NovitaConversationalTask, NovitaTextGenerationTask, NovitaTextToVideoTask
35
36
  from .nscale import NscaleConversationalTask, NscaleTextToImageTask
36
37
  from .openai import OpenAIConversationalTask
37
- from .replicate import ReplicateTask, ReplicateTextToImageTask, ReplicateTextToSpeechTask
38
+ from .replicate import ReplicateImageToImageTask, ReplicateTask, ReplicateTextToImageTask, ReplicateTextToSpeechTask
38
39
  from .sambanova import SambanovaConversationalTask, SambanovaFeatureExtractionTask
39
40
  from .together import TogetherConversationalTask, TogetherTextGenerationTask, TogetherTextToImageTask
40
41
 
@@ -78,6 +79,7 @@ PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
78
79
  "text-to-image": FalAITextToImageTask(),
79
80
  "text-to-speech": FalAITextToSpeechTask(),
80
81
  "text-to-video": FalAITextToVideoTask(),
82
+ "image-to-image": FalAIImageToImageTask(),
81
83
  },
82
84
  "featherless-ai": {
83
85
  "conversational": FeatherlessConversationalTask(),
@@ -141,6 +143,7 @@ PROVIDERS: Dict[PROVIDER_T, Dict[str, TaskProviderHelper]] = {
141
143
  "conversational": OpenAIConversationalTask(),
142
144
  },
143
145
  "replicate": {
146
+ "image-to-image": ReplicateImageToImageTask(),
144
147
  "text-to-image": ReplicateTextToImageTask(),
145
148
  "text-to-speech": ReplicateTextToSpeechTask(),
146
149
  "text-to-video": ReplicateTask("text-to-video"),
@@ -51,9 +51,6 @@ def filter_none(obj: Union[Dict[str, Any], List[Any]]) -> Union[Dict[str, Any],
51
51
  continue
52
52
  if isinstance(v, (dict, list)):
53
53
  v = filter_none(v)
54
- # remove empty nested dicts
55
- if isinstance(v, dict) and not v:
56
- continue
57
54
  cleaned[k] = v
58
55
  return cleaned
59
56
 
@@ -101,7 +98,7 @@ class TaskProviderHelper:
101
98
  # prepare payload (to customize in subclasses)
102
99
  payload = self._prepare_payload_as_dict(inputs, parameters, provider_mapping_info=provider_mapping_info)
103
100
  if payload is not None:
104
- payload = recursive_merge(payload, extra_payload or {})
101
+ payload = recursive_merge(payload, filter_none(extra_payload or {}))
105
102
 
106
103
  # body data (to customize in subclasses)
107
104
  data = self._prepare_payload_as_bytes(inputs, parameters, provider_mapping_info, extra_payload)
@@ -134,7 +131,7 @@ class TaskProviderHelper:
134
131
  api_key = get_token()
135
132
  if api_key is None:
136
133
  raise ValueError(
137
- f"You must provide an api_key to work with {self.provider} API or log in with `huggingface-cli login`."
134
+ f"You must provide an api_key to work with {self.provider} API or log in with `hf auth login`."
138
135
  )
139
136
  return api_key
140
137
 
@@ -270,7 +267,7 @@ class BaseTextGenerationTask(TaskProviderHelper):
270
267
  def _prepare_payload_as_dict(
271
268
  self, inputs: Any, parameters: Dict, provider_mapping_info: InferenceProviderMapping
272
269
  ) -> Optional[Dict]:
273
- return {"prompt": inputs, **filter_none(parameters), "model": provider_mapping_info.provider_id}
270
+ return filter_none({"prompt": inputs, **parameters, "model": provider_mapping_info.provider_id})
274
271
 
275
272
 
276
273
  @lru_cache(maxsize=None)