huggingface-hub 0.35.0rc0__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (50) hide show
  1. huggingface_hub/__init__.py +19 -1
  2. huggingface_hub/_jobs_api.py +168 -12
  3. huggingface_hub/_local_folder.py +1 -1
  4. huggingface_hub/_oauth.py +5 -9
  5. huggingface_hub/_tensorboard_logger.py +9 -10
  6. huggingface_hub/_upload_large_folder.py +108 -1
  7. huggingface_hub/cli/auth.py +4 -1
  8. huggingface_hub/cli/cache.py +7 -9
  9. huggingface_hub/cli/hf.py +2 -5
  10. huggingface_hub/cli/jobs.py +591 -13
  11. huggingface_hub/cli/repo.py +10 -4
  12. huggingface_hub/commands/delete_cache.py +2 -2
  13. huggingface_hub/commands/scan_cache.py +1 -1
  14. huggingface_hub/dataclasses.py +3 -0
  15. huggingface_hub/file_download.py +12 -10
  16. huggingface_hub/hf_api.py +549 -95
  17. huggingface_hub/hf_file_system.py +4 -10
  18. huggingface_hub/hub_mixin.py +5 -3
  19. huggingface_hub/inference/_client.py +98 -181
  20. huggingface_hub/inference/_common.py +72 -70
  21. huggingface_hub/inference/_generated/_async_client.py +116 -201
  22. huggingface_hub/inference/_generated/types/chat_completion.py +2 -0
  23. huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
  24. huggingface_hub/inference/_mcp/cli.py +1 -1
  25. huggingface_hub/inference/_mcp/constants.py +1 -1
  26. huggingface_hub/inference/_mcp/mcp_client.py +28 -11
  27. huggingface_hub/inference/_mcp/types.py +3 -0
  28. huggingface_hub/inference/_mcp/utils.py +7 -3
  29. huggingface_hub/inference/_providers/__init__.py +13 -0
  30. huggingface_hub/inference/_providers/_common.py +29 -4
  31. huggingface_hub/inference/_providers/black_forest_labs.py +1 -1
  32. huggingface_hub/inference/_providers/fal_ai.py +33 -2
  33. huggingface_hub/inference/_providers/hf_inference.py +15 -7
  34. huggingface_hub/inference/_providers/publicai.py +6 -0
  35. huggingface_hub/inference/_providers/replicate.py +1 -1
  36. huggingface_hub/inference/_providers/scaleway.py +28 -0
  37. huggingface_hub/lfs.py +2 -4
  38. huggingface_hub/repocard.py +2 -1
  39. huggingface_hub/utils/_dotenv.py +24 -20
  40. huggingface_hub/utils/_git_credential.py +1 -1
  41. huggingface_hub/utils/_http.py +3 -5
  42. huggingface_hub/utils/_runtime.py +1 -0
  43. huggingface_hub/utils/_typing.py +24 -4
  44. huggingface_hub/utils/_xet_progress_reporting.py +31 -10
  45. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/METADATA +7 -4
  46. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/RECORD +50 -48
  47. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/LICENSE +0 -0
  48. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/WHEEL +0 -0
  49. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/entry_points.txt +0 -0
  50. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/top_level.txt +0 -0
@@ -896,7 +896,7 @@ class HfFileSystem(fsspec.AbstractFileSystem):
896
896
  repo_type=resolve_remote_path.repo_type,
897
897
  endpoint=self.endpoint,
898
898
  ),
899
- temp_file=outfile,
899
+ temp_file=outfile, # type: ignore[arg-type]
900
900
  displayed_filename=rpath,
901
901
  expected_size=expected_size,
902
902
  resume_size=0,
@@ -958,13 +958,7 @@ class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
958
958
  repo_type=self.resolved_path.repo_type,
959
959
  endpoint=self.fs.endpoint,
960
960
  )
961
- r = http_backoff(
962
- "GET",
963
- url,
964
- headers=headers,
965
- retry_on_status_codes=(500, 502, 503, 504),
966
- timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
967
- )
961
+ r = http_backoff("GET", url, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT)
968
962
  hf_raise_for_status(r)
969
963
  return r.content
970
964
 
@@ -1063,12 +1057,12 @@ class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
1063
1057
  "GET",
1064
1058
  url,
1065
1059
  headers=self.fs._api._build_hf_headers(),
1066
- retry_on_status_codes=(500, 502, 503, 504),
1067
1060
  stream=True,
1068
1061
  timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
1069
1062
  )
1070
1063
  hf_raise_for_status(self.response)
1071
1064
  try:
1065
+ self.response.raw.decode_content = True
1072
1066
  out = self.response.raw.read(*read_args)
1073
1067
  except Exception:
1074
1068
  self.response.close()
@@ -1085,12 +1079,12 @@ class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
1085
1079
  "GET",
1086
1080
  url,
1087
1081
  headers={"Range": "bytes=%d-" % self.loc, **self.fs._api._build_hf_headers()},
1088
- retry_on_status_codes=(500, 502, 503, 504),
1089
1082
  stream=True,
1090
1083
  timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
1091
1084
  )
1092
1085
  hf_raise_for_status(self.response)
1093
1086
  try:
1087
+ self.response.raw.decode_content = True
1094
1088
  out = self.response.raw.read(*read_args)
1095
1089
  except Exception:
1096
1090
  self.response.close()
@@ -266,12 +266,14 @@ class ModelHubMixin:
266
266
  if pipeline_tag is not None:
267
267
  info.model_card_data.pipeline_tag = pipeline_tag
268
268
  if tags is not None:
269
+ normalized_tags = list(tags)
269
270
  if info.model_card_data.tags is not None:
270
- info.model_card_data.tags.extend(tags)
271
+ info.model_card_data.tags.extend(normalized_tags)
271
272
  else:
272
- info.model_card_data.tags = tags
273
+ info.model_card_data.tags = normalized_tags
273
274
 
274
- info.model_card_data.tags = sorted(set(info.model_card_data.tags))
275
+ if info.model_card_data.tags is not None:
276
+ info.model_card_data.tags = sorted(set(info.model_card_data.tags))
275
277
 
276
278
  # Handle encoders/decoders for args
277
279
  cls._hub_mixin_coders = coders or {}
@@ -45,7 +45,6 @@ from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
45
45
  from huggingface_hub.inference._common import (
46
46
  TASKS_EXPECTING_IMAGES,
47
47
  ContentT,
48
- ModelStatus,
49
48
  RequestParameters,
50
49
  _b64_encode,
51
50
  _b64_to_image,
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
54
53
  _bytes_to_list,
55
54
  _get_unsupported_text_generation_kwargs,
56
55
  _import_numpy,
57
- _open_as_binary,
58
56
  _set_unsupported_text_generation_kwargs,
59
57
  _stream_chat_completion_response,
60
58
  _stream_text_generation_response,
@@ -81,6 +79,7 @@ from huggingface_hub.inference._generated.types import (
81
79
  ImageSegmentationSubtask,
82
80
  ImageToImageTargetSize,
83
81
  ImageToTextOutput,
82
+ ImageToVideoTargetSize,
84
83
  ObjectDetectionOutputElement,
85
84
  Padding,
86
85
  QuestionAnsweringOutputElement,
@@ -104,7 +103,6 @@ from huggingface_hub.inference._generated.types import (
104
103
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
105
104
  from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
106
105
  from huggingface_hub.utils._auth import get_token
107
- from huggingface_hub.utils._deprecation import _deprecate_method
108
106
 
109
107
 
110
108
  if TYPE_CHECKING:
@@ -132,7 +130,7 @@ class InferenceClient:
132
130
  Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
133
131
  arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
134
132
  provider (`str`, *optional*):
135
- Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
133
+ Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"` or `"together"`.
136
134
  Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
137
135
  If model is a URL or `base_url` is passed, then `provider` is not used.
138
136
  token (`str`, *optional*):
@@ -258,21 +256,20 @@ class InferenceClient:
258
256
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
259
257
  request_parameters.headers["Accept"] = "image/png"
260
258
 
261
- with _open_as_binary(request_parameters.data) as data_as_binary:
262
- try:
263
- response = get_session().post(
264
- request_parameters.url,
265
- json=request_parameters.json,
266
- data=data_as_binary,
267
- headers=request_parameters.headers,
268
- cookies=self.cookies,
269
- timeout=self.timeout,
270
- stream=stream,
271
- proxies=self.proxies,
272
- )
273
- except TimeoutError as error:
274
- # Convert any `TimeoutError` to a `InferenceTimeoutError`
275
- raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
259
+ try:
260
+ response = get_session().post(
261
+ request_parameters.url,
262
+ json=request_parameters.json,
263
+ data=request_parameters.data,
264
+ headers=request_parameters.headers,
265
+ cookies=self.cookies,
266
+ timeout=self.timeout,
267
+ stream=stream,
268
+ proxies=self.proxies,
269
+ )
270
+ except TimeoutError as error:
271
+ # Convert any `TimeoutError` to a `InferenceTimeoutError`
272
+ raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
276
273
 
277
274
  try:
278
275
  hf_raise_for_status(response)
@@ -1339,6 +1336,85 @@ class InferenceClient:
1339
1336
  response = provider_helper.get_response(response, request_parameters)
1340
1337
  return _bytes_to_image(response)
1341
1338
 
1339
+ def image_to_video(
1340
+ self,
1341
+ image: ContentT,
1342
+ *,
1343
+ model: Optional[str] = None,
1344
+ prompt: Optional[str] = None,
1345
+ negative_prompt: Optional[str] = None,
1346
+ num_frames: Optional[float] = None,
1347
+ num_inference_steps: Optional[int] = None,
1348
+ guidance_scale: Optional[float] = None,
1349
+ seed: Optional[int] = None,
1350
+ target_size: Optional[ImageToVideoTargetSize] = None,
1351
+ **kwargs,
1352
+ ) -> bytes:
1353
+ """
1354
+ Generate a video from an input image.
1355
+
1356
+ Args:
1357
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1358
+ The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1359
+ model (`str`, *optional*):
1360
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1361
+ Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
1362
+ prompt (`str`, *optional*):
1363
+ The text prompt to guide the video generation.
1364
+ negative_prompt (`str`, *optional*):
1365
+ One prompt to guide what NOT to include in video generation.
1366
+ num_frames (`float`, *optional*):
1367
+ The num_frames parameter determines how many video frames are generated.
1368
+ num_inference_steps (`int`, *optional*):
1369
+ For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
1370
+ quality image at the expense of slower inference.
1371
+ guidance_scale (`float`, *optional*):
1372
+ For diffusion models. A higher guidance scale value encourages the model to generate videos closely
1373
+ linked to the text prompt at the expense of lower image quality.
1374
+ seed (`int`, *optional*):
1375
+ The seed to use for the video generation.
1376
+ target_size (`ImageToVideoTargetSize`, *optional*):
1377
+ The size in pixel of the output video frames.
1378
+ num_inference_steps (`int`, *optional*):
1379
+ The number of denoising steps. More denoising steps usually lead to a higher quality video at the
1380
+ expense of slower inference.
1381
+ seed (`int`, *optional*):
1382
+ Seed for the random number generator.
1383
+
1384
+ Returns:
1385
+ `bytes`: The generated video.
1386
+
1387
+ Examples:
1388
+ ```py
1389
+ >>> from huggingface_hub import InferenceClient
1390
+ >>> client = InferenceClient()
1391
+ >>> video = client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger")
1392
+ >>> with open("tiger.mp4", "wb") as f:
1393
+ ... f.write(video)
1394
+ ```
1395
+ """
1396
+ model_id = model or self.model
1397
+ provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id)
1398
+ request_parameters = provider_helper.prepare_request(
1399
+ inputs=image,
1400
+ parameters={
1401
+ "prompt": prompt,
1402
+ "negative_prompt": negative_prompt,
1403
+ "num_frames": num_frames,
1404
+ "num_inference_steps": num_inference_steps,
1405
+ "guidance_scale": guidance_scale,
1406
+ "seed": seed,
1407
+ "target_size": target_size,
1408
+ **kwargs,
1409
+ },
1410
+ headers=self.headers,
1411
+ model=model_id,
1412
+ api_key=self.token,
1413
+ )
1414
+ response = self._inner_post(request_parameters)
1415
+ response = provider_helper.get_response(response, request_parameters)
1416
+ return response
1417
+
1342
1418
  def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
1343
1419
  """
1344
1420
  Takes an input image and return text.
@@ -1382,8 +1458,8 @@ class InferenceClient:
1382
1458
  api_key=self.token,
1383
1459
  )
1384
1460
  response = self._inner_post(request_parameters)
1385
- output = ImageToTextOutput.parse_obj(response)
1386
- return output[0] if isinstance(output, list) else output
1461
+ output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1462
+ return output_list[0]
1387
1463
 
1388
1464
  def object_detection(
1389
1465
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
@@ -3193,101 +3269,6 @@ class InferenceClient:
3193
3269
  response = self._inner_post(request_parameters)
3194
3270
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3195
3271
 
3196
- @_deprecate_method(
3197
- version="0.35.0",
3198
- message=(
3199
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3200
- " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3201
- ),
3202
- )
3203
- def list_deployed_models(
3204
- self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3205
- ) -> Dict[str, List[str]]:
3206
- """
3207
- List models deployed on the HF Serverless Inference API service.
3208
-
3209
- This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3210
- are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3211
- specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3212
- in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3213
- frameworks are checked, the more time it will take.
3214
-
3215
- <Tip warning={true}>
3216
-
3217
- This endpoint method does not return a live list of all models available for the HF Inference API service.
3218
- It searches over a cached list of models that were recently available and the list may not be up to date.
3219
- If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3220
-
3221
- </Tip>
3222
-
3223
- <Tip>
3224
-
3225
- This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3226
- check its availability, you can directly use [`~InferenceClient.get_model_status`].
3227
-
3228
- </Tip>
3229
-
3230
- Args:
3231
- frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3232
- The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3233
- "all", all available frameworks will be tested. It is also possible to provide a single framework or a
3234
- custom set of frameworks to check.
3235
-
3236
- Returns:
3237
- `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3238
-
3239
- Example:
3240
- ```python
3241
- >>> from huggingface_hub import InferenceClient
3242
- >>> client = InferenceClient()
3243
-
3244
- # Discover zero-shot-classification models currently deployed
3245
- >>> models = client.list_deployed_models()
3246
- >>> models["zero-shot-classification"]
3247
- ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3248
-
3249
- # List from only 1 framework
3250
- >>> client.list_deployed_models("text-generation-inference")
3251
- {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3252
- ```
3253
- """
3254
- if self.provider != "hf-inference":
3255
- raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
3256
-
3257
- # Resolve which frameworks to check
3258
- if frameworks is None:
3259
- frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3260
- elif frameworks == "all":
3261
- frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3262
- elif isinstance(frameworks, str):
3263
- frameworks = [frameworks]
3264
- frameworks = list(set(frameworks))
3265
-
3266
- # Fetch them iteratively
3267
- models_by_task: Dict[str, List[str]] = {}
3268
-
3269
- def _unpack_response(framework: str, items: List[Dict]) -> None:
3270
- for model in items:
3271
- if framework == "sentence-transformers":
3272
- # Model running with the `sentence-transformers` framework can work with both tasks even if not
3273
- # branded as such in the API response
3274
- models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
3275
- models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
3276
- else:
3277
- models_by_task.setdefault(model["task"], []).append(model["model_id"])
3278
-
3279
- for framework in frameworks:
3280
- response = get_session().get(
3281
- f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3282
- )
3283
- hf_raise_for_status(response)
3284
- _unpack_response(framework, response.json())
3285
-
3286
- # Sort alphabetically for discoverability and return
3287
- for task, models in models_by_task.items():
3288
- models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
3289
- return models_by_task
3290
-
3291
3272
  def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3292
3273
  """
3293
3274
  Get information about the deployed endpoint.
@@ -3351,7 +3332,6 @@ class InferenceClient:
3351
3332
  Check the health of the deployed endpoint.
3352
3333
 
3353
3334
  Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3354
- For Inference API, please use [`InferenceClient.get_model_status`] instead.
3355
3335
 
3356
3336
  Args:
3357
3337
  model (`str`, *optional*):
@@ -3375,75 +3355,12 @@ class InferenceClient:
3375
3355
  if model is None:
3376
3356
  raise ValueError("Model id not provided.")
3377
3357
  if not model.startswith(("http://", "https://")):
3378
- raise ValueError(
3379
- "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3380
- )
3358
+ raise ValueError("Model must be an Inference Endpoint URL.")
3381
3359
  url = model.rstrip("/") + "/health"
3382
3360
 
3383
3361
  response = get_session().get(url, headers=build_hf_headers(token=self.token))
3384
3362
  return response.status_code == 200
3385
3363
 
3386
- @_deprecate_method(
3387
- version="0.35.0",
3388
- message=(
3389
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3390
- " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3391
- ),
3392
- )
3393
- def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3394
- """
3395
- Get the status of a model hosted on the HF Inference API.
3396
-
3397
- <Tip>
3398
-
3399
- This endpoint is mostly useful when you already know which model you want to use and want to check its
3400
- availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3401
-
3402
- </Tip>
3403
-
3404
- Args:
3405
- model (`str`, *optional*):
3406
- Identifier of the model for witch the status gonna be checked. If model is not provided,
3407
- the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3408
- identifier cannot be a URL.
3409
-
3410
-
3411
- Returns:
3412
- [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3413
- about the state of the model: load, state, compute type and framework.
3414
-
3415
- Example:
3416
- ```py
3417
- >>> from huggingface_hub import InferenceClient
3418
- >>> client = InferenceClient()
3419
- >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3420
- ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3421
- ```
3422
- """
3423
- if self.provider != "hf-inference":
3424
- raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
3425
-
3426
- model = model or self.model
3427
- if model is None:
3428
- raise ValueError("Model id not provided.")
3429
- if model.startswith("https://"):
3430
- raise NotImplementedError("Model status is only available for Inference API endpoints.")
3431
- url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3432
-
3433
- response = get_session().get(url, headers=build_hf_headers(token=self.token))
3434
- hf_raise_for_status(response)
3435
- response_data = response.json()
3436
-
3437
- if "error" in response_data:
3438
- raise ValueError(response_data["error"])
3439
-
3440
- return ModelStatus(
3441
- loaded=response_data["loaded"],
3442
- state=response_data["state"],
3443
- compute_type=response_data["compute_type"],
3444
- framework=response_data["framework"],
3445
- )
3446
-
3447
3364
  @property
3448
3365
  def chat(self) -> "ProxyClientChat":
3449
3366
  return ProxyClientChat(self)
@@ -19,7 +19,6 @@ import io
19
19
  import json
20
20
  import logging
21
21
  import mimetypes
22
- from contextlib import contextmanager
23
22
  from dataclasses import dataclass
24
23
  from pathlib import Path
25
24
  from typing import (
@@ -27,9 +26,7 @@ from typing import (
27
26
  Any,
28
27
  AsyncIterable,
29
28
  BinaryIO,
30
- ContextManager,
31
29
  Dict,
32
- Generator,
33
30
  Iterable,
34
31
  List,
35
32
  Literal,
@@ -61,8 +58,7 @@ if TYPE_CHECKING:
61
58
  # TYPES
62
59
  UrlT = str
63
60
  PathT = Union[str, Path]
64
- BinaryT = Union[bytes, BinaryIO]
65
- ContentT = Union[BinaryT, PathT, UrlT, "Image"]
61
+ ContentT = Union[bytes, BinaryIO, PathT, UrlT, "Image", bytearray, memoryview]
66
62
 
67
63
  # Use to set a Accept: image/png header
68
64
  TASKS_EXPECTING_IMAGES = {"text-to-image", "image-to-image"}
@@ -76,39 +72,33 @@ class RequestParameters:
76
72
  task: str
77
73
  model: Optional[str]
78
74
  json: Optional[Union[str, Dict, List]]
79
- data: Optional[ContentT]
75
+ data: Optional[bytes]
80
76
  headers: Dict[str, Any]
81
77
 
82
78
 
83
- # Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
84
- @dataclass
85
- class ModelStatus:
79
+ class MimeBytes(bytes):
86
80
  """
87
- This Dataclass represents the model status in the HF Inference API.
88
-
89
- Args:
90
- loaded (`bool`):
91
- If the model is currently loaded into HF's Inference API. Models
92
- are loaded on-demand, leading to the user's first request taking longer.
93
- If a model is loaded, you can be assured that it is in a healthy state.
94
- state (`str`):
95
- The current state of the model. This can be 'Loaded', 'Loadable', 'TooBig'.
96
- If a model's state is 'Loadable', it's not too big and has a supported
97
- backend. Loadable models are automatically loaded when the user first
98
- requests inference on the endpoint. This means it is transparent for the
99
- user to load a model, except that the first call takes longer to complete.
100
- compute_type (`Dict`):
101
- Information about the compute resource the model is using or will use, such as 'gpu' type and number of
102
- replicas.
103
- framework (`str`):
104
- The name of the framework that the model was built with, such as 'transformers'
105
- or 'text-generation-inference'.
81
+ A bytes object with a mime type.
82
+ To be returned by `_prepare_payload_open_as_mime_bytes` in subclasses.
83
+
84
+ Example:
85
+ ```python
86
+ >>> b = MimeBytes(b"hello", "text/plain")
87
+ >>> isinstance(b, bytes)
88
+ True
89
+ >>> b.mime_type
90
+ 'text/plain'
91
+ ```
106
92
  """
107
93
 
108
- loaded: bool
109
- state: str
110
- compute_type: Dict
111
- framework: str
94
+ mime_type: Optional[str]
95
+
96
+ def __new__(cls, data: bytes, mime_type: Optional[str] = None):
97
+ obj = super().__new__(cls, data)
98
+ obj.mime_type = mime_type
99
+ if isinstance(data, MimeBytes) and mime_type is None:
100
+ obj.mime_type = data.mime_type
101
+ return obj
112
102
 
113
103
 
114
104
  ## IMPORT UTILS
@@ -148,31 +138,49 @@ def _import_pil_image():
148
138
 
149
139
 
150
140
  @overload
151
- def _open_as_binary(
152
- content: ContentT,
153
- ) -> ContextManager[BinaryT]: ... # means "if input is not None, output is not None"
141
+ def _open_as_mime_bytes(content: ContentT) -> MimeBytes: ... # means "if input is not None, output is not None"
154
142
 
155
143
 
156
144
  @overload
157
- def _open_as_binary(
158
- content: Literal[None],
159
- ) -> ContextManager[Literal[None]]: ... # means "if input is None, output is None"
145
+ def _open_as_mime_bytes(content: Literal[None]) -> Literal[None]: ... # means "if input is None, output is None"
160
146
 
161
147
 
162
- @contextmanager # type: ignore
163
- def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT], None, None]:
148
+ def _open_as_mime_bytes(content: Optional[ContentT]) -> Optional[MimeBytes]:
164
149
  """Open `content` as a binary file, either from a URL, a local path, raw bytes, or a PIL Image.
165
150
 
166
151
  Do nothing if `content` is None.
167
-
168
- TODO: handle base64 as input
169
152
  """
153
+ # If content is None, yield None
154
+ if content is None:
155
+ return None
156
+
157
+ # If content is bytes, return it
158
+ if isinstance(content, bytes):
159
+ return MimeBytes(content)
160
+
161
+ # If content is raw binary data (bytearray, memoryview)
162
+ if isinstance(content, (bytearray, memoryview)):
163
+ return MimeBytes(bytes(content))
164
+
165
+ # If content is a binary file-like object
166
+ if hasattr(content, "read"): # duck-typing instead of isinstance(content, BinaryIO)
167
+ logger.debug("Reading content from BinaryIO")
168
+ data = content.read()
169
+ mime_type = mimetypes.guess_type(content.name)[0] if hasattr(content, "name") else None
170
+ if isinstance(data, str):
171
+ raise TypeError("Expected binary stream (bytes), but got text stream")
172
+ return MimeBytes(data, mime_type=mime_type)
173
+
170
174
  # If content is a string => must be either a URL or a path
171
175
  if isinstance(content, str):
172
176
  if content.startswith("https://") or content.startswith("http://"):
173
177
  logger.debug(f"Downloading content from {content}")
174
- yield get_session().get(content).content # TODO: retrieve as stream and pipe to post request ?
175
- return
178
+ response = get_session().get(content)
179
+ mime_type = response.headers.get("Content-Type")
180
+ if mime_type is None:
181
+ mime_type = mimetypes.guess_type(content)[0]
182
+ return MimeBytes(response.content, mime_type=mime_type)
183
+
176
184
  content = Path(content)
177
185
  if not content.exists():
178
186
  raise FileNotFoundError(
@@ -183,9 +191,7 @@ def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT],
183
191
  # If content is a Path => open it
184
192
  if isinstance(content, Path):
185
193
  logger.debug(f"Opening content from {content}")
186
- with content.open("rb") as f:
187
- yield f
188
- return
194
+ return MimeBytes(content.read_bytes(), mime_type=mimetypes.guess_type(content)[0])
189
195
 
190
196
  # If content is a PIL Image => convert to bytes
191
197
  if is_pillow_available():
@@ -194,38 +200,37 @@ def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT],
194
200
  if isinstance(content, Image.Image):
195
201
  logger.debug("Converting PIL Image to bytes")
196
202
  buffer = io.BytesIO()
197
- content.save(buffer, format=content.format or "PNG")
198
- yield buffer.getvalue()
199
- return
203
+ format = content.format or "PNG"
204
+ content.save(buffer, format=format)
205
+ return MimeBytes(buffer.getvalue(), mime_type=f"image/{format.lower()}")
200
206
 
201
- # Otherwise: already a file-like object or None
202
- yield content # type: ignore
207
+ # If nothing matched, raise error
208
+ raise TypeError(
209
+ f"Unsupported content type: {type(content)}. "
210
+ "Expected one of: bytes, bytearray, BinaryIO, memoryview, Path, str (URL or file path), or PIL.Image.Image."
211
+ )
203
212
 
204
213
 
205
214
  def _b64_encode(content: ContentT) -> str:
206
215
  """Encode a raw file (image, audio) into base64. Can be bytes, an opened file, a path or a URL."""
207
- with _open_as_binary(content) as data:
208
- data_as_bytes = data if isinstance(data, bytes) else data.read()
209
- return base64.b64encode(data_as_bytes).decode()
216
+ raw_bytes = _open_as_mime_bytes(content)
217
+ return base64.b64encode(raw_bytes).decode()
210
218
 
211
219
 
212
220
  def _as_url(content: ContentT, default_mime_type: str) -> str:
213
- if isinstance(content, str) and (content.startswith("https://") or content.startswith("http://")):
221
+ if isinstance(content, str) and content.startswith(("http://", "https://", "data:")):
214
222
  return content
215
223
 
216
- # Handle MIME type detection for different content types
217
- mime_type = None
218
- if isinstance(content, (str, Path)):
219
- mime_type = mimetypes.guess_type(content, strict=False)[0]
220
- elif is_pillow_available():
221
- from PIL import Image
224
+ # Convert content to bytes
225
+ raw_bytes = _open_as_mime_bytes(content)
222
226
 
223
- if isinstance(content, Image.Image):
224
- # Determine MIME type from PIL Image format, in sync with `_open_as_binary`
225
- mime_type = f"image/{(content.format or 'PNG').lower()}"
227
+ # Get MIME type
228
+ mime_type = raw_bytes.mime_type or default_mime_type
226
229
 
227
- mime_type = mime_type or default_mime_type
228
- encoded_data = _b64_encode(content)
230
+ # Encode content to base64
231
+ encoded_data = base64.b64encode(raw_bytes).decode()
232
+
233
+ # Build data URL
229
234
  return f"data:{mime_type};base64,{encoded_data}"
230
235
 
231
236
 
@@ -270,9 +275,6 @@ def _as_dict(response: Union[bytes, Dict]) -> Dict:
270
275
  return json.loads(response) if isinstance(response, bytes) else response
271
276
 
272
277
 
273
- ## PAYLOAD UTILS
274
-
275
-
276
278
  ## STREAMING UTILS
277
279
 
278
280