huggingface-hub 0.30.1__py3-none-any.whl → 0.31.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. huggingface_hub/__init__.py +1 -1
  2. huggingface_hub/_commit_api.py +23 -4
  3. huggingface_hub/_inference_endpoints.py +8 -5
  4. huggingface_hub/_snapshot_download.py +2 -1
  5. huggingface_hub/_space_api.py +0 -5
  6. huggingface_hub/_upload_large_folder.py +26 -3
  7. huggingface_hub/commands/upload.py +2 -1
  8. huggingface_hub/constants.py +1 -0
  9. huggingface_hub/file_download.py +58 -10
  10. huggingface_hub/hf_api.py +81 -15
  11. huggingface_hub/inference/_client.py +105 -150
  12. huggingface_hub/inference/_generated/_async_client.py +105 -150
  13. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +2 -3
  14. huggingface_hub/inference/_generated/types/chat_completion.py +3 -3
  15. huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
  16. huggingface_hub/inference/_generated/types/text_generation.py +1 -1
  17. huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
  18. huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
  19. huggingface_hub/inference/_providers/__init__.py +55 -17
  20. huggingface_hub/inference/_providers/_common.py +34 -19
  21. huggingface_hub/inference/_providers/black_forest_labs.py +4 -1
  22. huggingface_hub/inference/_providers/fal_ai.py +36 -11
  23. huggingface_hub/inference/_providers/hf_inference.py +33 -11
  24. huggingface_hub/inference/_providers/hyperbolic.py +5 -1
  25. huggingface_hub/inference/_providers/nebius.py +15 -1
  26. huggingface_hub/inference/_providers/novita.py +14 -1
  27. huggingface_hub/inference/_providers/openai.py +3 -2
  28. huggingface_hub/inference/_providers/replicate.py +22 -3
  29. huggingface_hub/inference/_providers/sambanova.py +23 -1
  30. huggingface_hub/inference/_providers/together.py +15 -1
  31. huggingface_hub/repocard_data.py +24 -4
  32. huggingface_hub/utils/_pagination.py +2 -2
  33. huggingface_hub/utils/_runtime.py +4 -0
  34. huggingface_hub/utils/_xet.py +1 -12
  35. {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/METADATA +3 -2
  36. {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/RECORD +40 -40
  37. {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/LICENSE +0 -0
  38. {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/WHEEL +0 -0
  39. {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/entry_points.txt +0 -0
  40. {huggingface_hub-0.30.1.dist-info → huggingface_hub-0.31.0rc0.dist-info}/top_level.txt +0 -0
@@ -100,7 +100,7 @@ from huggingface_hub.inference._generated.types import (
100
100
  ZeroShotClassificationOutputElement,
101
101
  ZeroShotImageClassificationOutputElement,
102
102
  )
103
- from huggingface_hub.inference._providers import PROVIDER_T, HFInferenceTask, get_provider_helper
103
+ from huggingface_hub.inference._providers import PROVIDER_T, get_provider_helper
104
104
  from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
105
105
  from huggingface_hub.utils._auth import get_token
106
106
  from huggingface_hub.utils._deprecation import _deprecate_method
@@ -134,15 +134,14 @@ class InferenceClient:
134
134
  documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
135
135
  provider (`str`, *optional*):
136
136
  Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
137
- defaults to hf-inference (Hugging Face Serverless Inference API).
137
+ Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
138
138
  If model is a URL or `base_url` is passed, then `provider` is not used.
139
139
  token (`str`, *optional*):
140
140
  Hugging Face token. Will default to the locally saved token if not provided.
141
141
  Note: for better compatibility with OpenAI's client, `token` has been aliased as `api_key`. Those 2
142
142
  arguments are mutually exclusive and have the exact same behavior.
143
143
  timeout (`float`, `optional`):
144
- The maximum number of seconds to wait for a response from the server. Loading a new model in Inference
145
- API can take up to several minutes. Defaults to None, meaning it will loop until the server is available.
144
+ The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
146
145
  headers (`Dict[str, str]`, `optional`):
147
146
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
148
147
  Values in this dictionary will override the default values.
@@ -165,7 +164,7 @@ class InferenceClient:
165
164
  self,
166
165
  model: Optional[str] = None,
167
166
  *,
168
- provider: Optional[PROVIDER_T] = None,
167
+ provider: Union[Literal["auto"], PROVIDER_T, None] = None,
169
168
  token: Optional[str] = None,
170
169
  timeout: Optional[float] = None,
171
170
  headers: Optional[Dict[str, str]] = None,
@@ -228,7 +227,7 @@ class InferenceClient:
228
227
  )
229
228
 
230
229
  # Configure provider
231
- self.provider = provider if provider is not None else "hf-inference"
230
+ self.provider = provider
232
231
 
233
232
  self.cookies = cookies
234
233
  self.timeout = timeout
@@ -237,83 +236,6 @@ class InferenceClient:
237
236
  def __repr__(self):
238
237
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
239
238
 
240
- @overload
241
- def post( # type: ignore[misc]
242
- self,
243
- *,
244
- json: Optional[Union[str, Dict, List]] = None,
245
- data: Optional[ContentT] = None,
246
- model: Optional[str] = None,
247
- task: Optional[str] = None,
248
- stream: Literal[False] = ...,
249
- ) -> bytes: ...
250
-
251
- @overload
252
- def post( # type: ignore[misc]
253
- self,
254
- *,
255
- json: Optional[Union[str, Dict, List]] = None,
256
- data: Optional[ContentT] = None,
257
- model: Optional[str] = None,
258
- task: Optional[str] = None,
259
- stream: Literal[True] = ...,
260
- ) -> Iterable[bytes]: ...
261
-
262
- @overload
263
- def post(
264
- self,
265
- *,
266
- json: Optional[Union[str, Dict, List]] = None,
267
- data: Optional[ContentT] = None,
268
- model: Optional[str] = None,
269
- task: Optional[str] = None,
270
- stream: bool = False,
271
- ) -> Union[bytes, Iterable[bytes]]: ...
272
-
273
- @_deprecate_method(
274
- version="0.31.0",
275
- message=(
276
- "Making direct POST requests to the inference server is not supported anymore. "
277
- "Please use task methods instead (e.g. `InferenceClient.chat_completion`). "
278
- "If your use case is not supported, please open an issue in https://github.com/huggingface/huggingface_hub."
279
- ),
280
- )
281
- def post(
282
- self,
283
- *,
284
- json: Optional[Union[str, Dict, List]] = None,
285
- data: Optional[ContentT] = None,
286
- model: Optional[str] = None,
287
- task: Optional[str] = None,
288
- stream: bool = False,
289
- ) -> Union[bytes, Iterable[bytes]]:
290
- """
291
- Make a POST request to the inference server.
292
-
293
- This method is deprecated and will be removed in the future.
294
- Please use task methods instead (e.g. `InferenceClient.chat_completion`).
295
- """
296
- if self.provider != "hf-inference":
297
- raise ValueError(
298
- "Cannot use `post` with another provider than `hf-inference`. "
299
- "`InferenceClient.post` is deprecated and should not be used directly anymore."
300
- )
301
- provider_helper = HFInferenceTask(task or "unknown")
302
- mapped_model = provider_helper._prepare_mapped_model(model or self.model)
303
- url = provider_helper._prepare_url(self.token, mapped_model) # type: ignore[arg-type]
304
- headers = provider_helper._prepare_headers(self.headers, self.token) # type: ignore[arg-type]
305
- return self._inner_post(
306
- request_parameters=RequestParameters(
307
- url=url,
308
- task=task or "unknown",
309
- model=model or "unknown",
310
- json=json,
311
- data=data,
312
- headers=headers,
313
- ),
314
- stream=stream,
315
- )
316
-
317
239
  @overload
318
240
  def _inner_post( # type: ignore[misc]
319
241
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -409,12 +331,13 @@ class InferenceClient:
409
331
  ]
410
332
  ```
411
333
  """
412
- provider_helper = get_provider_helper(self.provider, task="audio-classification")
334
+ model_id = model or self.model
335
+ provider_helper = get_provider_helper(self.provider, task="audio-classification", model=model_id)
413
336
  request_parameters = provider_helper.prepare_request(
414
337
  inputs=audio,
415
338
  parameters={"function_to_apply": function_to_apply, "top_k": top_k},
416
339
  headers=self.headers,
417
- model=model or self.model,
340
+ model=model_id,
418
341
  api_key=self.token,
419
342
  )
420
343
  response = self._inner_post(request_parameters)
@@ -457,12 +380,13 @@ class InferenceClient:
457
380
  f.write(item.blob)
458
381
  ```
459
382
  """
460
- provider_helper = get_provider_helper(self.provider, task="audio-to-audio")
383
+ model_id = model or self.model
384
+ provider_helper = get_provider_helper(self.provider, task="audio-to-audio", model=model_id)
461
385
  request_parameters = provider_helper.prepare_request(
462
386
  inputs=audio,
463
387
  parameters={},
464
388
  headers=self.headers,
465
- model=model or self.model,
389
+ model=model_id,
466
390
  api_key=self.token,
467
391
  )
468
392
  response = self._inner_post(request_parameters)
@@ -507,12 +431,13 @@ class InferenceClient:
507
431
  "hello world"
508
432
  ```
509
433
  """
510
- provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
434
+ model_id = model or self.model
435
+ provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition", model=model_id)
511
436
  request_parameters = provider_helper.prepare_request(
512
437
  inputs=audio,
513
438
  parameters={**(extra_body or {})},
514
439
  headers=self.headers,
515
- model=model or self.model,
440
+ model=model_id,
516
441
  api_key=self.token,
517
442
  )
518
443
  response = self._inner_post(request_parameters)
@@ -951,15 +876,21 @@ class InferenceClient:
951
876
  '{\n\n"activity": "bike ride",\n"animals": ["puppy", "cat", "raccoon"],\n"animals_seen": 3,\n"location": "park"}'
952
877
  ```
953
878
  """
954
- # Get the provider helper
955
- provider_helper = get_provider_helper(self.provider, task="conversational")
956
-
957
879
  # Since `chat_completion(..., model=xxx)` is also a payload parameter for the server, we need to handle 'model' differently.
958
880
  # `self.model` takes precedence over 'model' argument for building URL.
959
881
  # `model` takes precedence for payload value.
960
882
  model_id_or_url = self.model or model
961
883
  payload_model = model or self.model
962
884
 
885
+ # Get the provider helper
886
+ provider_helper = get_provider_helper(
887
+ self.provider,
888
+ task="conversational",
889
+ model=model_id_or_url
890
+ if model_id_or_url is not None and model_id_or_url.startswith(("http://", "https://"))
891
+ else payload_model,
892
+ )
893
+
963
894
  # Prepare the payload
964
895
  parameters = {
965
896
  "model": payload_model,
@@ -1061,8 +992,9 @@ class InferenceClient:
1061
992
  [DocumentQuestionAnsweringOutputElement(answer='us-001', end=16, score=0.9999666213989258, start=16)]
1062
993
  ```
1063
994
  """
995
+ model_id = model or self.model
996
+ provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
1064
997
  inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1065
- provider_helper = get_provider_helper(self.provider, task="document-question-answering")
1066
998
  request_parameters = provider_helper.prepare_request(
1067
999
  inputs=inputs,
1068
1000
  parameters={
@@ -1076,7 +1008,7 @@ class InferenceClient:
1076
1008
  "word_boxes": word_boxes,
1077
1009
  },
1078
1010
  headers=self.headers,
1079
- model=model or self.model,
1011
+ model=model_id,
1080
1012
  api_key=self.token,
1081
1013
  )
1082
1014
  response = self._inner_post(request_parameters)
@@ -1099,8 +1031,8 @@ class InferenceClient:
1099
1031
  text (`str`):
1100
1032
  The text to embed.
1101
1033
  model (`str`, *optional*):
1102
- The model to use for the conversational task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1103
- a deployed Inference Endpoint. If not provided, the default recommended conversational model will be used.
1034
+ The model to use for the feature extraction task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1035
+ a deployed Inference Endpoint. If not provided, the default recommended feature extraction model will be used.
1104
1036
  Defaults to None.
1105
1037
  normalize (`bool`, *optional*):
1106
1038
  Whether to normalize the embeddings or not.
@@ -1137,7 +1069,8 @@ class InferenceClient:
1137
1069
  [ 0.28552425, -0.928395 , -1.2077185 , ..., 0.76810825, -2.1069427 , 0.6236161 ]], dtype=float32)
1138
1070
  ```
1139
1071
  """
1140
- provider_helper = get_provider_helper(self.provider, task="feature-extraction")
1072
+ model_id = model or self.model
1073
+ provider_helper = get_provider_helper(self.provider, task="feature-extraction", model=model_id)
1141
1074
  request_parameters = provider_helper.prepare_request(
1142
1075
  inputs=text,
1143
1076
  parameters={
@@ -1147,12 +1080,12 @@ class InferenceClient:
1147
1080
  "truncation_direction": truncation_direction,
1148
1081
  },
1149
1082
  headers=self.headers,
1150
- model=model or self.model,
1083
+ model=model_id,
1151
1084
  api_key=self.token,
1152
1085
  )
1153
1086
  response = self._inner_post(request_parameters)
1154
1087
  np = _import_numpy()
1155
- return np.array(_bytes_to_dict(response), dtype="float32")
1088
+ return np.array(provider_helper.get_response(response), dtype="float32")
1156
1089
 
1157
1090
  def fill_mask(
1158
1091
  self,
@@ -1198,12 +1131,13 @@ class InferenceClient:
1198
1131
  ]
1199
1132
  ```
1200
1133
  """
1201
- provider_helper = get_provider_helper(self.provider, task="fill-mask")
1134
+ model_id = model or self.model
1135
+ provider_helper = get_provider_helper(self.provider, task="fill-mask", model=model_id)
1202
1136
  request_parameters = provider_helper.prepare_request(
1203
1137
  inputs=text,
1204
1138
  parameters={"targets": targets, "top_k": top_k},
1205
1139
  headers=self.headers,
1206
- model=model or self.model,
1140
+ model=model_id,
1207
1141
  api_key=self.token,
1208
1142
  )
1209
1143
  response = self._inner_post(request_parameters)
@@ -1247,12 +1181,13 @@ class InferenceClient:
1247
1181
  [ImageClassificationOutputElement(label='Blenheim spaniel', score=0.9779096841812134), ...]
1248
1182
  ```
1249
1183
  """
1250
- provider_helper = get_provider_helper(self.provider, task="image-classification")
1184
+ model_id = model or self.model
1185
+ provider_helper = get_provider_helper(self.provider, task="image-classification", model=model_id)
1251
1186
  request_parameters = provider_helper.prepare_request(
1252
1187
  inputs=image,
1253
1188
  parameters={"function_to_apply": function_to_apply, "top_k": top_k},
1254
1189
  headers=self.headers,
1255
- model=model or self.model,
1190
+ model=model_id,
1256
1191
  api_key=self.token,
1257
1192
  )
1258
1193
  response = self._inner_post(request_parameters)
@@ -1308,7 +1243,8 @@ class InferenceClient:
1308
1243
  [ImageSegmentationOutputElement(score=0.989008, label='LABEL_184', mask=<PIL.PngImagePlugin.PngImageFile image mode=L size=400x300 at 0x7FDD2B129CC0>), ...]
1309
1244
  ```
1310
1245
  """
1311
- provider_helper = get_provider_helper(self.provider, task="image-segmentation")
1246
+ model_id = model or self.model
1247
+ provider_helper = get_provider_helper(self.provider, task="image-segmentation", model=model_id)
1312
1248
  request_parameters = provider_helper.prepare_request(
1313
1249
  inputs=image,
1314
1250
  parameters={
@@ -1318,7 +1254,7 @@ class InferenceClient:
1318
1254
  "threshold": threshold,
1319
1255
  },
1320
1256
  headers=self.headers,
1321
- model=model or self.model,
1257
+ model=model_id,
1322
1258
  api_key=self.token,
1323
1259
  )
1324
1260
  response = self._inner_post(request_parameters)
@@ -1384,7 +1320,8 @@ class InferenceClient:
1384
1320
  >>> image.save("tiger.jpg")
1385
1321
  ```
1386
1322
  """
1387
- provider_helper = get_provider_helper(self.provider, task="image-to-image")
1323
+ model_id = model or self.model
1324
+ provider_helper = get_provider_helper(self.provider, task="image-to-image", model=model_id)
1388
1325
  request_parameters = provider_helper.prepare_request(
1389
1326
  inputs=image,
1390
1327
  parameters={
@@ -1396,7 +1333,7 @@ class InferenceClient:
1396
1333
  **kwargs,
1397
1334
  },
1398
1335
  headers=self.headers,
1399
- model=model or self.model,
1336
+ model=model_id,
1400
1337
  api_key=self.token,
1401
1338
  )
1402
1339
  response = self._inner_post(request_parameters)
@@ -1435,12 +1372,13 @@ class InferenceClient:
1435
1372
  'a dog laying on the grass next to a flower pot '
1436
1373
  ```
1437
1374
  """
1438
- provider_helper = get_provider_helper(self.provider, task="image-to-text")
1375
+ model_id = model or self.model
1376
+ provider_helper = get_provider_helper(self.provider, task="image-to-text", model=model_id)
1439
1377
  request_parameters = provider_helper.prepare_request(
1440
1378
  inputs=image,
1441
1379
  parameters={},
1442
1380
  headers=self.headers,
1443
- model=model or self.model,
1381
+ model=model_id,
1444
1382
  api_key=self.token,
1445
1383
  )
1446
1384
  response = self._inner_post(request_parameters)
@@ -1486,12 +1424,13 @@ class InferenceClient:
1486
1424
  [ObjectDetectionOutputElement(score=0.9486683011054993, label='person', box=ObjectDetectionBoundingBox(xmin=59, ymin=39, xmax=420, ymax=510)), ...]
1487
1425
  ```
1488
1426
  """
1489
- provider_helper = get_provider_helper(self.provider, task="object-detection")
1427
+ model_id = model or self.model
1428
+ provider_helper = get_provider_helper(self.provider, task="object-detection", model=model_id)
1490
1429
  request_parameters = provider_helper.prepare_request(
1491
1430
  inputs=image,
1492
1431
  parameters={"threshold": threshold},
1493
1432
  headers=self.headers,
1494
- model=model or self.model,
1433
+ model=model_id,
1495
1434
  api_key=self.token,
1496
1435
  )
1497
1436
  response = self._inner_post(request_parameters)
@@ -1559,7 +1498,8 @@ class InferenceClient:
1559
1498
  QuestionAnsweringOutputElement(answer='Clara', end=16, score=0.9326565265655518, start=11)
1560
1499
  ```
1561
1500
  """
1562
- provider_helper = get_provider_helper(self.provider, task="question-answering")
1501
+ model_id = model or self.model
1502
+ provider_helper = get_provider_helper(self.provider, task="question-answering", model=model_id)
1563
1503
  request_parameters = provider_helper.prepare_request(
1564
1504
  inputs=None,
1565
1505
  parameters={
@@ -1573,7 +1513,7 @@ class InferenceClient:
1573
1513
  },
1574
1514
  extra_payload={"question": question, "context": context},
1575
1515
  headers=self.headers,
1576
- model=model or self.model,
1516
+ model=model_id,
1577
1517
  api_key=self.token,
1578
1518
  )
1579
1519
  response = self._inner_post(request_parameters)
@@ -1593,8 +1533,8 @@ class InferenceClient:
1593
1533
  other_sentences (`List[str]`):
1594
1534
  The list of sentences to compare to.
1595
1535
  model (`str`, *optional*):
1596
- The model to use for the conversational task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1597
- a deployed Inference Endpoint. If not provided, the default recommended conversational model will be used.
1536
+ The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1537
+ a deployed Inference Endpoint. If not provided, the default recommended sentence similarity model will be used.
1598
1538
  Defaults to None.
1599
1539
 
1600
1540
  Returns:
@@ -1621,13 +1561,14 @@ class InferenceClient:
1621
1561
  [0.7785726189613342, 0.45876261591911316, 0.2906220555305481]
1622
1562
  ```
1623
1563
  """
1624
- provider_helper = get_provider_helper(self.provider, task="sentence-similarity")
1564
+ model_id = model or self.model
1565
+ provider_helper = get_provider_helper(self.provider, task="sentence-similarity", model=model_id)
1625
1566
  request_parameters = provider_helper.prepare_request(
1626
- inputs=None,
1567
+ inputs={"source_sentence": sentence, "sentences": other_sentences},
1627
1568
  parameters={},
1628
- extra_payload={"source_sentence": sentence, "sentences": other_sentences},
1569
+ extra_payload={},
1629
1570
  headers=self.headers,
1630
- model=model or self.model,
1571
+ model=model_id,
1631
1572
  api_key=self.token,
1632
1573
  )
1633
1574
  response = self._inner_post(request_parameters)
@@ -1679,12 +1620,13 @@ class InferenceClient:
1679
1620
  "generate_parameters": generate_parameters,
1680
1621
  "truncation": truncation,
1681
1622
  }
1682
- provider_helper = get_provider_helper(self.provider, task="summarization")
1623
+ model_id = model or self.model
1624
+ provider_helper = get_provider_helper(self.provider, task="summarization", model=model_id)
1683
1625
  request_parameters = provider_helper.prepare_request(
1684
1626
  inputs=text,
1685
1627
  parameters=parameters,
1686
1628
  headers=self.headers,
1687
- model=model or self.model,
1629
+ model=model_id,
1688
1630
  api_key=self.token,
1689
1631
  )
1690
1632
  response = self._inner_post(request_parameters)
@@ -1740,13 +1682,14 @@ class InferenceClient:
1740
1682
  TableQuestionAnsweringOutputElement(answer='36542', coordinates=[[0, 1]], cells=['36542'], aggregator='AVERAGE')
1741
1683
  ```
1742
1684
  """
1743
- provider_helper = get_provider_helper(self.provider, task="table-question-answering")
1685
+ model_id = model or self.model
1686
+ provider_helper = get_provider_helper(self.provider, task="table-question-answering", model=model_id)
1744
1687
  request_parameters = provider_helper.prepare_request(
1745
1688
  inputs=None,
1746
1689
  parameters={"model": model, "padding": padding, "sequential": sequential, "truncation": truncation},
1747
1690
  extra_payload={"query": query, "table": table},
1748
1691
  headers=self.headers,
1749
- model=model or self.model,
1692
+ model=model_id,
1750
1693
  api_key=self.token,
1751
1694
  )
1752
1695
  response = self._inner_post(request_parameters)
@@ -1794,13 +1737,14 @@ class InferenceClient:
1794
1737
  ["5", "5", "5"]
1795
1738
  ```
1796
1739
  """
1797
- provider_helper = get_provider_helper(self.provider, task="tabular-classification")
1740
+ model_id = model or self.model
1741
+ provider_helper = get_provider_helper(self.provider, task="tabular-classification", model=model_id)
1798
1742
  request_parameters = provider_helper.prepare_request(
1799
1743
  inputs=None,
1800
1744
  extra_payload={"table": table},
1801
1745
  parameters={},
1802
1746
  headers=self.headers,
1803
- model=model or self.model,
1747
+ model=model_id,
1804
1748
  api_key=self.token,
1805
1749
  )
1806
1750
  response = self._inner_post(request_parameters)
@@ -1843,13 +1787,14 @@ class InferenceClient:
1843
1787
  [110, 120, 130]
1844
1788
  ```
1845
1789
  """
1846
- provider_helper = get_provider_helper(self.provider, task="tabular-regression")
1790
+ model_id = model or self.model
1791
+ provider_helper = get_provider_helper(self.provider, task="tabular-regression", model=model_id)
1847
1792
  request_parameters = provider_helper.prepare_request(
1848
1793
  inputs=None,
1849
1794
  parameters={},
1850
1795
  extra_payload={"table": table},
1851
1796
  headers=self.headers,
1852
- model=model or self.model,
1797
+ model=model_id,
1853
1798
  api_key=self.token,
1854
1799
  )
1855
1800
  response = self._inner_post(request_parameters)
@@ -1898,7 +1843,8 @@ class InferenceClient:
1898
1843
  ]
1899
1844
  ```
1900
1845
  """
1901
- provider_helper = get_provider_helper(self.provider, task="text-classification")
1846
+ model_id = model or self.model
1847
+ provider_helper = get_provider_helper(self.provider, task="text-classification", model=model_id)
1902
1848
  request_parameters = provider_helper.prepare_request(
1903
1849
  inputs=text,
1904
1850
  parameters={
@@ -1906,7 +1852,7 @@ class InferenceClient:
1906
1852
  "top_k": top_k,
1907
1853
  },
1908
1854
  headers=self.headers,
1909
- model=model or self.model,
1855
+ model=model_id,
1910
1856
  api_key=self.token,
1911
1857
  )
1912
1858
  response = self._inner_post(request_parameters)
@@ -2347,13 +2293,14 @@ class InferenceClient:
2347
2293
  " Please pass `stream=False` as input."
2348
2294
  )
2349
2295
 
2350
- provider_helper = get_provider_helper(self.provider, task="text-generation")
2296
+ model_id = model or self.model
2297
+ provider_helper = get_provider_helper(self.provider, task="text-generation", model=model_id)
2351
2298
  request_parameters = provider_helper.prepare_request(
2352
2299
  inputs=prompt,
2353
2300
  parameters=parameters,
2354
2301
  extra_payload={"stream": stream},
2355
2302
  headers=self.headers,
2356
- model=model or self.model,
2303
+ model=model_id,
2357
2304
  api_key=self.token,
2358
2305
  )
2359
2306
 
@@ -2369,7 +2316,7 @@ class InferenceClient:
2369
2316
  prompt=prompt,
2370
2317
  details=details,
2371
2318
  stream=stream,
2372
- model=model or self.model,
2319
+ model=model_id,
2373
2320
  adapter_id=adapter_id,
2374
2321
  best_of=best_of,
2375
2322
  decoder_input_details=decoder_input_details,
@@ -2400,8 +2347,8 @@ class InferenceClient:
2400
2347
  # Data can be a single element (dict) or an iterable of dicts where we select the first element of.
2401
2348
  if isinstance(data, list):
2402
2349
  data = data[0]
2403
-
2404
- return TextGenerationOutput.parse_obj_as_instance(data) if details else data["generated_text"]
2350
+ response = provider_helper.get_response(data, request_parameters)
2351
+ return TextGenerationOutput.parse_obj_as_instance(response) if details else response["generated_text"]
2405
2352
 
2406
2353
  def text_to_image(
2407
2354
  self,
@@ -2524,7 +2471,8 @@ class InferenceClient:
2524
2471
  >>> image.save("astronaut.png")
2525
2472
  ```
2526
2473
  """
2527
- provider_helper = get_provider_helper(self.provider, task="text-to-image")
2474
+ model_id = model or self.model
2475
+ provider_helper = get_provider_helper(self.provider, task="text-to-image", model=model_id)
2528
2476
  request_parameters = provider_helper.prepare_request(
2529
2477
  inputs=prompt,
2530
2478
  parameters={
@@ -2538,7 +2486,7 @@ class InferenceClient:
2538
2486
  **(extra_body or {}),
2539
2487
  },
2540
2488
  headers=self.headers,
2541
- model=model or self.model,
2489
+ model=model_id,
2542
2490
  api_key=self.token,
2543
2491
  )
2544
2492
  response = self._inner_post(request_parameters)
@@ -2622,7 +2570,8 @@ class InferenceClient:
2622
2570
  ... file.write(video)
2623
2571
  ```
2624
2572
  """
2625
- provider_helper = get_provider_helper(self.provider, task="text-to-video")
2573
+ model_id = model or self.model
2574
+ provider_helper = get_provider_helper(self.provider, task="text-to-video", model=model_id)
2626
2575
  request_parameters = provider_helper.prepare_request(
2627
2576
  inputs=prompt,
2628
2577
  parameters={
@@ -2634,7 +2583,7 @@ class InferenceClient:
2634
2583
  **(extra_body or {}),
2635
2584
  },
2636
2585
  headers=self.headers,
2637
- model=model or self.model,
2586
+ model=model_id,
2638
2587
  api_key=self.token,
2639
2588
  )
2640
2589
  response = self._inner_post(request_parameters)
@@ -2819,7 +2768,8 @@ class InferenceClient:
2819
2768
  ... f.write(audio)
2820
2769
  ```
2821
2770
  """
2822
- provider_helper = get_provider_helper(self.provider, task="text-to-speech")
2771
+ model_id = model or self.model
2772
+ provider_helper = get_provider_helper(self.provider, task="text-to-speech", model=model_id)
2823
2773
  request_parameters = provider_helper.prepare_request(
2824
2774
  inputs=text,
2825
2775
  parameters={
@@ -2842,7 +2792,7 @@ class InferenceClient:
2842
2792
  **(extra_body or {}),
2843
2793
  },
2844
2794
  headers=self.headers,
2845
- model=model or self.model,
2795
+ model=model_id,
2846
2796
  api_key=self.token,
2847
2797
  )
2848
2798
  response = self._inner_post(request_parameters)
@@ -2908,7 +2858,8 @@ class InferenceClient:
2908
2858
  ]
2909
2859
  ```
2910
2860
  """
2911
- provider_helper = get_provider_helper(self.provider, task="token-classification")
2861
+ model_id = model or self.model
2862
+ provider_helper = get_provider_helper(self.provider, task="token-classification", model=model_id)
2912
2863
  request_parameters = provider_helper.prepare_request(
2913
2864
  inputs=text,
2914
2865
  parameters={
@@ -2917,7 +2868,7 @@ class InferenceClient:
2917
2868
  "stride": stride,
2918
2869
  },
2919
2870
  headers=self.headers,
2920
- model=model or self.model,
2871
+ model=model_id,
2921
2872
  api_key=self.token,
2922
2873
  )
2923
2874
  response = self._inner_post(request_parameters)
@@ -2994,7 +2945,8 @@ class InferenceClient:
2994
2945
  if src_lang is None and tgt_lang is not None:
2995
2946
  raise ValueError("You cannot specify `tgt_lang` without specifying `src_lang`.")
2996
2947
 
2997
- provider_helper = get_provider_helper(self.provider, task="translation")
2948
+ model_id = model or self.model
2949
+ provider_helper = get_provider_helper(self.provider, task="translation", model=model_id)
2998
2950
  request_parameters = provider_helper.prepare_request(
2999
2951
  inputs=text,
3000
2952
  parameters={
@@ -3005,7 +2957,7 @@ class InferenceClient:
3005
2957
  "generate_parameters": generate_parameters,
3006
2958
  },
3007
2959
  headers=self.headers,
3008
- model=model or self.model,
2960
+ model=model_id,
3009
2961
  api_key=self.token,
3010
2962
  )
3011
2963
  response = self._inner_post(request_parameters)
@@ -3057,12 +3009,13 @@ class InferenceClient:
3057
3009
  ]
3058
3010
  ```
3059
3011
  """
3060
- provider_helper = get_provider_helper(self.provider, task="visual-question-answering")
3012
+ model_id = model or self.model
3013
+ provider_helper = get_provider_helper(self.provider, task="visual-question-answering", model=model_id)
3061
3014
  request_parameters = provider_helper.prepare_request(
3062
3015
  inputs=image,
3063
3016
  parameters={"top_k": top_k},
3064
3017
  headers=self.headers,
3065
- model=model or self.model,
3018
+ model=model_id,
3066
3019
  api_key=self.token,
3067
3020
  extra_payload={"question": question, "image": _b64_encode(image)},
3068
3021
  )
@@ -3155,7 +3108,8 @@ class InferenceClient:
3155
3108
  ]
3156
3109
  ```
3157
3110
  """
3158
- provider_helper = get_provider_helper(self.provider, task="zero-shot-classification")
3111
+ model_id = model or self.model
3112
+ provider_helper = get_provider_helper(self.provider, task="zero-shot-classification", model=model_id)
3159
3113
  request_parameters = provider_helper.prepare_request(
3160
3114
  inputs=text,
3161
3115
  parameters={
@@ -3164,7 +3118,7 @@ class InferenceClient:
3164
3118
  "hypothesis_template": hypothesis_template,
3165
3119
  },
3166
3120
  headers=self.headers,
3167
- model=model or self.model,
3121
+ model=model_id,
3168
3122
  api_key=self.token,
3169
3123
  )
3170
3124
  response = self._inner_post(request_parameters)
@@ -3226,7 +3180,8 @@ class InferenceClient:
3226
3180
  if len(candidate_labels) < 2:
3227
3181
  raise ValueError("You must specify at least 2 classes to compare.")
3228
3182
 
3229
- provider_helper = get_provider_helper(self.provider, task="zero-shot-image-classification")
3183
+ model_id = model or self.model
3184
+ provider_helper = get_provider_helper(self.provider, task="zero-shot-image-classification", model=model_id)
3230
3185
  request_parameters = provider_helper.prepare_request(
3231
3186
  inputs=image,
3232
3187
  parameters={
@@ -3234,7 +3189,7 @@ class InferenceClient:
3234
3189
  "hypothesis_template": hypothesis_template,
3235
3190
  },
3236
3191
  headers=self.headers,
3237
- model=model or self.model,
3192
+ model=model_id,
3238
3193
  api_key=self.token,
3239
3194
  )
3240
3195
  response = self._inner_post(request_parameters)