huggingface-hub 0.24.0__tar.gz → 0.24.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (122) hide show
  1. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/PKG-INFO +1 -1
  2. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/__init__.py +1 -1
  3. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_client.py +45 -120
  4. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_common.py +25 -61
  5. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/_async_client.py +45 -120
  6. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub.egg-info/PKG-INFO +1 -1
  7. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/LICENSE +0 -0
  8. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/MANIFEST.in +0 -0
  9. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/README.md +0 -0
  10. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/pyproject.toml +0 -0
  11. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/setup.cfg +0 -0
  12. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/setup.py +0 -0
  13. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_commit_api.py +0 -0
  14. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_commit_scheduler.py +0 -0
  15. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_inference_endpoints.py +0 -0
  16. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_local_folder.py +0 -0
  17. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_login.py +0 -0
  18. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_multi_commits.py +0 -0
  19. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_snapshot_download.py +0 -0
  20. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_space_api.py +0 -0
  21. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_tensorboard_logger.py +0 -0
  22. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_webhooks_payload.py +0 -0
  23. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/_webhooks_server.py +0 -0
  24. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/__init__.py +0 -0
  25. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/_cli_utils.py +0 -0
  26. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/delete_cache.py +0 -0
  27. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/download.py +0 -0
  28. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/env.py +0 -0
  29. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/huggingface_cli.py +0 -0
  30. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/lfs.py +0 -0
  31. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/repo_files.py +0 -0
  32. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/scan_cache.py +0 -0
  33. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/tag.py +0 -0
  34. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/upload.py +0 -0
  35. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/commands/user.py +0 -0
  36. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/community.py +0 -0
  37. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/constants.py +0 -0
  38. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/errors.py +0 -0
  39. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/fastai_utils.py +0 -0
  40. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/file_download.py +0 -0
  41. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/hf_api.py +0 -0
  42. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/hf_file_system.py +0 -0
  43. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/hub_mixin.py +0 -0
  44. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/__init__.py +0 -0
  45. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/__init__.py +0 -0
  46. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/__init__.py +0 -0
  47. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/audio_classification.py +0 -0
  48. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/audio_to_audio.py +0 -0
  49. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +0 -0
  50. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/base.py +0 -0
  51. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/chat_completion.py +0 -0
  52. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/depth_estimation.py +0 -0
  53. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/document_question_answering.py +0 -0
  54. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/feature_extraction.py +0 -0
  55. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/fill_mask.py +0 -0
  56. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/image_classification.py +0 -0
  57. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/image_segmentation.py +0 -0
  58. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/image_to_image.py +0 -0
  59. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/image_to_text.py +0 -0
  60. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/object_detection.py +0 -0
  61. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/question_answering.py +0 -0
  62. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/sentence_similarity.py +0 -0
  63. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/summarization.py +0 -0
  64. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/table_question_answering.py +0 -0
  65. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/text2text_generation.py +0 -0
  66. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/text_classification.py +0 -0
  67. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/text_generation.py +0 -0
  68. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/text_to_audio.py +0 -0
  69. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/text_to_image.py +0 -0
  70. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/token_classification.py +0 -0
  71. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/translation.py +0 -0
  72. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/video_classification.py +0 -0
  73. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/visual_question_answering.py +0 -0
  74. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/zero_shot_classification.py +0 -0
  75. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +0 -0
  76. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +0 -0
  77. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_templating.py +0 -0
  78. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference/_types.py +0 -0
  79. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/inference_api.py +0 -0
  80. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/keras_mixin.py +0 -0
  81. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/lfs.py +0 -0
  82. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/repocard.py +0 -0
  83. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/repocard_data.py +0 -0
  84. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/repository.py +0 -0
  85. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/serialization/__init__.py +0 -0
  86. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/serialization/_base.py +0 -0
  87. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/serialization/_tensorflow.py +0 -0
  88. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/serialization/_torch.py +0 -0
  89. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/templates/datasetcard_template.md +0 -0
  90. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/templates/modelcard_template.md +0 -0
  91. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/__init__.py +0 -0
  92. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_cache_assets.py +0 -0
  93. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_cache_manager.py +0 -0
  94. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_chunk_utils.py +0 -0
  95. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_datetime.py +0 -0
  96. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_deprecation.py +0 -0
  97. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_errors.py +0 -0
  98. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_experimental.py +0 -0
  99. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_fixes.py +0 -0
  100. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_git_credential.py +0 -0
  101. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_headers.py +0 -0
  102. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_hf_folder.py +0 -0
  103. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_http.py +0 -0
  104. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_pagination.py +0 -0
  105. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_paths.py +0 -0
  106. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_runtime.py +0 -0
  107. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_safetensors.py +0 -0
  108. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_subprocess.py +0 -0
  109. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_telemetry.py +0 -0
  110. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_token.py +0 -0
  111. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_typing.py +0 -0
  112. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/_validators.py +0 -0
  113. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/endpoint_helpers.py +0 -0
  114. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/insecure_hashlib.py +0 -0
  115. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/logging.py +0 -0
  116. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/sha.py +0 -0
  117. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub/utils/tqdm.py +0 -0
  118. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub.egg-info/SOURCES.txt +0 -0
  119. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub.egg-info/dependency_links.txt +0 -0
  120. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub.egg-info/entry_points.txt +0 -0
  121. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub.egg-info/requires.txt +0 -0
  122. {huggingface_hub-0.24.0 → huggingface_hub-0.24.1}/src/huggingface_hub.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: huggingface_hub
3
- Version: 0.24.0
3
+ Version: 0.24.1
4
4
  Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
5
5
  Home-page: https://github.com/huggingface/huggingface_hub
6
6
  Author: Hugging Face, Inc.
@@ -46,7 +46,7 @@ import sys
46
46
  from typing import TYPE_CHECKING
47
47
 
48
48
 
49
- __version__ = "0.24.0"
49
+ __version__ = "0.24.1"
50
50
 
51
51
  # Alphabetical order of definitions is ensured in tests
52
52
  # WARNING: any comment added in this dictionary definition will be lost when
@@ -66,11 +66,9 @@ from huggingface_hub.inference._common import (
66
66
  _fetch_recommended_models,
67
67
  _get_unsupported_text_generation_kwargs,
68
68
  _import_numpy,
69
- _is_chat_completion_server,
70
69
  _open_as_binary,
71
- _set_as_non_chat_completion_server,
72
70
  _set_unsupported_text_generation_kwargs,
73
- _stream_chat_completion_response_from_bytes,
71
+ _stream_chat_completion_response,
74
72
  _stream_text_generation_response,
75
73
  raise_text_generation_error,
76
74
  )
@@ -82,8 +80,6 @@ from huggingface_hub.inference._generated.types import (
82
80
  ChatCompletionInputTool,
83
81
  ChatCompletionInputToolTypeClass,
84
82
  ChatCompletionOutput,
85
- ChatCompletionOutputComplete,
86
- ChatCompletionOutputMessage,
87
83
  ChatCompletionStreamOutput,
88
84
  DocumentQuestionAnsweringOutputElement,
89
85
  FillMaskOutputElement,
@@ -189,7 +185,7 @@ class InferenceClient:
189
185
  )
190
186
 
191
187
  self.model: Optional[str] = model
192
- self.token: Union[str, bool, None] = token or api_key
188
+ self.token: Union[str, bool, None] = token if token is not None else api_key
193
189
  self.headers = CaseInsensitiveDict(build_hf_headers(token=self.token)) # 'authorization' + 'user-agent'
194
190
  if headers is not None:
195
191
  self.headers.update(headers)
@@ -818,123 +814,52 @@ class InferenceClient:
818
814
  # since `chat_completion(..., model=xxx)` is also a payload parameter for the
819
815
  # server, we need to handle it differently
820
816
  model = self.base_url or self.model or model or self.get_recommended_model("text-generation")
817
+ is_url = model.startswith(("http://", "https://"))
818
+
819
+ # First, resolve the model chat completions URL
820
+ if model == self.base_url:
821
+ # base_url passed => add server route
822
+ model_url = model + "/v1/chat/completions"
823
+ elif is_url:
824
+ # model is a URL => use it directly
825
+ model_url = model
826
+ else:
827
+ # model is a model ID => resolve it + add server route
828
+ model_url = self._resolve_url(model) + "/v1/chat/completions"
829
+
830
+ # `model` is sent in the payload. Not used by the server but can be useful for debugging/routing.
831
+ # If it's a ID on the Hub => use it. Otherwise, we use a random string.
832
+ model_id = model if not is_url and model.count("/") == 1 else "tgi"
833
+
834
+ data = self.post(
835
+ model=model_url,
836
+ json=dict(
837
+ model=model_id,
838
+ messages=messages,
839
+ frequency_penalty=frequency_penalty,
840
+ logit_bias=logit_bias,
841
+ logprobs=logprobs,
842
+ max_tokens=max_tokens,
843
+ n=n,
844
+ presence_penalty=presence_penalty,
845
+ response_format=response_format,
846
+ seed=seed,
847
+ stop=stop,
848
+ temperature=temperature,
849
+ tool_choice=tool_choice,
850
+ tool_prompt=tool_prompt,
851
+ tools=tools,
852
+ top_logprobs=top_logprobs,
853
+ top_p=top_p,
854
+ stream=stream,
855
+ ),
856
+ stream=stream,
857
+ )
821
858
 
822
- if _is_chat_completion_server(model):
823
- # First, let's consider the server has a `/v1/chat/completions` endpoint.
824
- # If that's the case, we don't have to render the chat template client-side.
825
- model_url = self._resolve_url(model)
826
- if not model_url.endswith("/chat/completions"):
827
- model_url += "/v1/chat/completions"
828
-
829
- # `model` is sent in the payload. Not used by the server but can be useful for debugging/routing.
830
- if not model.startswith("http") and model.count("/") == 1:
831
- # If it's a ID on the Hub => use it
832
- model_id = model
833
- else:
834
- # Otherwise, we use a random string
835
- model_id = "tgi"
836
-
837
- try:
838
- data = self.post(
839
- model=model_url,
840
- json=dict(
841
- model=model_id,
842
- messages=messages,
843
- frequency_penalty=frequency_penalty,
844
- logit_bias=logit_bias,
845
- logprobs=logprobs,
846
- max_tokens=max_tokens,
847
- n=n,
848
- presence_penalty=presence_penalty,
849
- response_format=response_format,
850
- seed=seed,
851
- stop=stop,
852
- temperature=temperature,
853
- tool_choice=tool_choice,
854
- tool_prompt=tool_prompt,
855
- tools=tools,
856
- top_logprobs=top_logprobs,
857
- top_p=top_p,
858
- stream=stream,
859
- ),
860
- stream=stream,
861
- )
862
- except HTTPError as e:
863
- if e.response.status_code in (400, 404, 500):
864
- # Let's consider the server is not a chat completion server.
865
- # Then we call again `chat_completion` which will render the chat template client side.
866
- # (can be HTTP 500, HTTP 400, HTTP 404 depending on the server)
867
- _set_as_non_chat_completion_server(model)
868
- logger.warning(
869
- f"Server {model_url} does not seem to support chat completion. Falling back to text generation. Error: {e}"
870
- )
871
- return self.chat_completion(
872
- messages=messages,
873
- model=model,
874
- stream=stream,
875
- max_tokens=max_tokens,
876
- seed=seed,
877
- stop=stop,
878
- temperature=temperature,
879
- top_p=top_p,
880
- )
881
- raise
882
-
883
- if stream:
884
- return _stream_chat_completion_response_from_bytes(data) # type: ignore[arg-type]
885
-
886
- return ChatCompletionOutput.parse_obj_as_instance(data) # type: ignore[arg-type]
887
-
888
- # At this point, we know the server is not a chat completion server.
889
- # It means it's a transformers-backed server for which we can send a list of messages directly to the
890
- # `text-generation` pipeline. We won't receive a detailed response but only the generated text.
891
859
  if stream:
892
- raise ValueError(
893
- "Streaming token is not supported by the model. This is due to the model not been served by a "
894
- "Text-Generation-Inference server. Please pass `stream=False` as input."
895
- )
896
- if tool_choice is not None or tool_prompt is not None or tools is not None:
897
- warnings.warn(
898
- "Tools are not supported by the model. This is due to the model not been served by a "
899
- "Text-Generation-Inference server. The provided tool parameters will be ignored."
900
- )
901
- if response_format is not None:
902
- warnings.warn(
903
- "Response format is not supported by the model. This is due to the model not been served by a "
904
- "Text-Generation-Inference server. The provided response format will be ignored."
905
- )
906
-
907
- # generate response
908
- text_generation_output = self.text_generation(
909
- prompt=messages, # type: ignore # Not correct type but works implicitly
910
- model=model,
911
- stream=False,
912
- details=False,
913
- max_new_tokens=max_tokens,
914
- seed=seed,
915
- stop_sequences=stop,
916
- temperature=temperature,
917
- top_p=top_p,
918
- )
860
+ return _stream_chat_completion_response(data) # type: ignore[arg-type]
919
861
 
920
- # Format as a ChatCompletionOutput with dummy values for fields we can't provide
921
- return ChatCompletionOutput(
922
- id="dummy",
923
- model="dummy",
924
- system_fingerprint="dummy",
925
- usage=None, # type: ignore # set to `None` as we don't want to provide false information
926
- created=int(time.time()),
927
- choices=[
928
- ChatCompletionOutputComplete(
929
- finish_reason="unk", # type: ignore # set to `unk` as we don't want to provide false information
930
- index=0,
931
- message=ChatCompletionOutputMessage(
932
- content=text_generation_output,
933
- role="assistant",
934
- ),
935
- )
936
- ],
937
- )
862
+ return ChatCompletionOutput.parse_obj_as_instance(data) # type: ignore[arg-type]
938
863
 
939
864
  def conversational(
940
865
  self,
@@ -34,7 +34,6 @@ from typing import (
34
34
  Literal,
35
35
  NoReturn,
36
36
  Optional,
37
- Set,
38
37
  Union,
39
38
  overload,
40
39
  )
@@ -61,8 +60,6 @@ from ..utils import (
61
60
  )
62
61
  from ._generated.types import (
63
62
  ChatCompletionStreamOutput,
64
- ChatCompletionStreamOutputChoice,
65
- ChatCompletionStreamOutputDelta,
66
63
  TextGenerationStreamOutput,
67
64
  )
68
65
 
@@ -271,7 +268,10 @@ def _stream_text_generation_response(
271
268
  """Used in `InferenceClient.text_generation`."""
272
269
  # Parse ServerSentEvents
273
270
  for byte_payload in bytes_output_as_lines:
274
- output = _format_text_generation_stream_output(byte_payload, details)
271
+ try:
272
+ output = _format_text_generation_stream_output(byte_payload, details)
273
+ except StopIteration:
274
+ break
275
275
  if output is not None:
276
276
  yield output
277
277
 
@@ -282,7 +282,10 @@ async def _async_stream_text_generation_response(
282
282
  """Used in `AsyncInferenceClient.text_generation`."""
283
283
  # Parse ServerSentEvents
284
284
  async for byte_payload in bytes_output_as_lines:
285
- output = _format_text_generation_stream_output(byte_payload, details)
285
+ try:
286
+ output = _format_text_generation_stream_output(byte_payload, details)
287
+ except StopIteration:
288
+ break
286
289
  if output is not None:
287
290
  yield output
288
291
 
@@ -293,6 +296,9 @@ def _format_text_generation_stream_output(
293
296
  if not byte_payload.startswith(b"data:"):
294
297
  return None # empty line
295
298
 
299
+ if byte_payload == b"data: [DONE]":
300
+ raise StopIteration("[DONE] signal received.")
301
+
296
302
  # Decode payload
297
303
  payload = byte_payload.decode("utf-8")
298
304
  json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
@@ -306,72 +312,41 @@ def _format_text_generation_stream_output(
306
312
  return output.token.text if not details else output
307
313
 
308
314
 
309
- def _format_chat_completion_stream_output_from_text_generation(
310
- item: TextGenerationStreamOutput, created: int
311
- ) -> ChatCompletionStreamOutput:
312
- if item.details is None:
313
- # new token generated => return delta
314
- return ChatCompletionStreamOutput(
315
- # explicitly set 'dummy' values to reduce expectations from users
316
- id="dummy",
317
- model="dummy",
318
- system_fingerprint="dummy",
319
- choices=[
320
- ChatCompletionStreamOutputChoice(
321
- delta=ChatCompletionStreamOutputDelta(
322
- role="assistant",
323
- content=item.token.text,
324
- ),
325
- finish_reason=None,
326
- index=0,
327
- )
328
- ],
329
- created=created,
330
- )
331
- else:
332
- # generation is completed => return finish reason
333
- return ChatCompletionStreamOutput(
334
- # explicitly set 'dummy' values to reduce expectations from users
335
- id="dummy",
336
- model="dummy",
337
- system_fingerprint="dummy",
338
- choices=[
339
- ChatCompletionStreamOutputChoice(
340
- delta=ChatCompletionStreamOutputDelta(role="assistant"),
341
- finish_reason=item.details.finish_reason,
342
- index=0,
343
- )
344
- ],
345
- created=created,
346
- )
347
-
348
-
349
- def _stream_chat_completion_response_from_bytes(
315
+ def _stream_chat_completion_response(
350
316
  bytes_lines: Iterable[bytes],
351
317
  ) -> Iterable[ChatCompletionStreamOutput]:
352
318
  """Used in `InferenceClient.chat_completion` if model is served with TGI."""
353
319
  for item in bytes_lines:
354
- output = _format_chat_completion_stream_output_from_text_generation_from_bytes(item)
320
+ try:
321
+ output = _format_chat_completion_stream_output(item)
322
+ except StopIteration:
323
+ break
355
324
  if output is not None:
356
325
  yield output
357
326
 
358
327
 
359
- async def _async_stream_chat_completion_response_from_bytes(
328
+ async def _async_stream_chat_completion_response(
360
329
  bytes_lines: AsyncIterable[bytes],
361
330
  ) -> AsyncIterable[ChatCompletionStreamOutput]:
362
331
  """Used in `AsyncInferenceClient.chat_completion`."""
363
332
  async for item in bytes_lines:
364
- output = _format_chat_completion_stream_output_from_text_generation_from_bytes(item)
333
+ try:
334
+ output = _format_chat_completion_stream_output(item)
335
+ except StopIteration:
336
+ break
365
337
  if output is not None:
366
338
  yield output
367
339
 
368
340
 
369
- def _format_chat_completion_stream_output_from_text_generation_from_bytes(
341
+ def _format_chat_completion_stream_output(
370
342
  byte_payload: bytes,
371
343
  ) -> Optional[ChatCompletionStreamOutput]:
372
344
  if not byte_payload.startswith(b"data:"):
373
345
  return None # empty line
374
346
 
347
+ if byte_payload == b"data: [DONE]":
348
+ raise StopIteration("[DONE] signal received.")
349
+
375
350
  # Decode payload
376
351
  payload = byte_payload.decode("utf-8")
377
352
  json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
@@ -413,17 +388,6 @@ def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
413
388
  return _UNSUPPORTED_TEXT_GENERATION_KWARGS.get(model, [])
414
389
 
415
390
 
416
- _NON_CHAT_COMPLETION_SERVER: Set[str] = set()
417
-
418
-
419
- def _set_as_non_chat_completion_server(model: str) -> None:
420
- _NON_CHAT_COMPLETION_SERVER.add(model)
421
-
422
-
423
- def _is_chat_completion_server(model: str) -> bool:
424
- return model not in _NON_CHAT_COMPLETION_SERVER
425
-
426
-
427
391
  # TEXT GENERATION ERRORS
428
392
  # ----------------------
429
393
  # Text-generation errors are parsed separately to handle as much as possible the errors returned by the text generation
@@ -44,7 +44,7 @@ from huggingface_hub.inference._common import (
44
44
  TASKS_EXPECTING_IMAGES,
45
45
  ContentT,
46
46
  ModelStatus,
47
- _async_stream_chat_completion_response_from_bytes,
47
+ _async_stream_chat_completion_response,
48
48
  _async_stream_text_generation_response,
49
49
  _b64_encode,
50
50
  _b64_to_image,
@@ -54,9 +54,7 @@ from huggingface_hub.inference._common import (
54
54
  _fetch_recommended_models,
55
55
  _get_unsupported_text_generation_kwargs,
56
56
  _import_numpy,
57
- _is_chat_completion_server,
58
57
  _open_as_binary,
59
- _set_as_non_chat_completion_server,
60
58
  _set_unsupported_text_generation_kwargs,
61
59
  raise_text_generation_error,
62
60
  )
@@ -68,8 +66,6 @@ from huggingface_hub.inference._generated.types import (
68
66
  ChatCompletionInputTool,
69
67
  ChatCompletionInputToolTypeClass,
70
68
  ChatCompletionOutput,
71
- ChatCompletionOutputComplete,
72
- ChatCompletionOutputMessage,
73
69
  ChatCompletionStreamOutput,
74
70
  DocumentQuestionAnsweringOutputElement,
75
71
  FillMaskOutputElement,
@@ -174,7 +170,7 @@ class AsyncInferenceClient:
174
170
  )
175
171
 
176
172
  self.model: Optional[str] = model
177
- self.token: Union[str, bool, None] = token or api_key
173
+ self.token: Union[str, bool, None] = token if token is not None else api_key
178
174
  self.headers = CaseInsensitiveDict(build_hf_headers(token=self.token)) # 'authorization' + 'user-agent'
179
175
  if headers is not None:
180
176
  self.headers.update(headers)
@@ -824,123 +820,52 @@ class AsyncInferenceClient:
824
820
  # since `chat_completion(..., model=xxx)` is also a payload parameter for the
825
821
  # server, we need to handle it differently
826
822
  model = self.base_url or self.model or model or self.get_recommended_model("text-generation")
823
+ is_url = model.startswith(("http://", "https://"))
824
+
825
+ # First, resolve the model chat completions URL
826
+ if model == self.base_url:
827
+ # base_url passed => add server route
828
+ model_url = model + "/v1/chat/completions"
829
+ elif is_url:
830
+ # model is a URL => use it directly
831
+ model_url = model
832
+ else:
833
+ # model is a model ID => resolve it + add server route
834
+ model_url = self._resolve_url(model) + "/v1/chat/completions"
835
+
836
+ # `model` is sent in the payload. Not used by the server but can be useful for debugging/routing.
837
+ # If it's a ID on the Hub => use it. Otherwise, we use a random string.
838
+ model_id = model if not is_url and model.count("/") == 1 else "tgi"
839
+
840
+ data = await self.post(
841
+ model=model_url,
842
+ json=dict(
843
+ model=model_id,
844
+ messages=messages,
845
+ frequency_penalty=frequency_penalty,
846
+ logit_bias=logit_bias,
847
+ logprobs=logprobs,
848
+ max_tokens=max_tokens,
849
+ n=n,
850
+ presence_penalty=presence_penalty,
851
+ response_format=response_format,
852
+ seed=seed,
853
+ stop=stop,
854
+ temperature=temperature,
855
+ tool_choice=tool_choice,
856
+ tool_prompt=tool_prompt,
857
+ tools=tools,
858
+ top_logprobs=top_logprobs,
859
+ top_p=top_p,
860
+ stream=stream,
861
+ ),
862
+ stream=stream,
863
+ )
827
864
 
828
- if _is_chat_completion_server(model):
829
- # First, let's consider the server has a `/v1/chat/completions` endpoint.
830
- # If that's the case, we don't have to render the chat template client-side.
831
- model_url = self._resolve_url(model)
832
- if not model_url.endswith("/chat/completions"):
833
- model_url += "/v1/chat/completions"
834
-
835
- # `model` is sent in the payload. Not used by the server but can be useful for debugging/routing.
836
- if not model.startswith("http") and model.count("/") == 1:
837
- # If it's a ID on the Hub => use it
838
- model_id = model
839
- else:
840
- # Otherwise, we use a random string
841
- model_id = "tgi"
842
-
843
- try:
844
- data = await self.post(
845
- model=model_url,
846
- json=dict(
847
- model=model_id,
848
- messages=messages,
849
- frequency_penalty=frequency_penalty,
850
- logit_bias=logit_bias,
851
- logprobs=logprobs,
852
- max_tokens=max_tokens,
853
- n=n,
854
- presence_penalty=presence_penalty,
855
- response_format=response_format,
856
- seed=seed,
857
- stop=stop,
858
- temperature=temperature,
859
- tool_choice=tool_choice,
860
- tool_prompt=tool_prompt,
861
- tools=tools,
862
- top_logprobs=top_logprobs,
863
- top_p=top_p,
864
- stream=stream,
865
- ),
866
- stream=stream,
867
- )
868
- except _import_aiohttp().ClientResponseError as e:
869
- if e.status in (400, 404, 500):
870
- # Let's consider the server is not a chat completion server.
871
- # Then we call again `chat_completion` which will render the chat template client side.
872
- # (can be HTTP 500, HTTP 400, HTTP 404 depending on the server)
873
- _set_as_non_chat_completion_server(model)
874
- logger.warning(
875
- f"Server {model_url} does not seem to support chat completion. Falling back to text generation. Error: {e}"
876
- )
877
- return await self.chat_completion(
878
- messages=messages,
879
- model=model,
880
- stream=stream,
881
- max_tokens=max_tokens,
882
- seed=seed,
883
- stop=stop,
884
- temperature=temperature,
885
- top_p=top_p,
886
- )
887
- raise
888
-
889
- if stream:
890
- return _async_stream_chat_completion_response_from_bytes(data) # type: ignore[arg-type]
891
-
892
- return ChatCompletionOutput.parse_obj_as_instance(data) # type: ignore[arg-type]
893
-
894
- # At this point, we know the server is not a chat completion server.
895
- # It means it's a transformers-backed server for which we can send a list of messages directly to the
896
- # `text-generation` pipeline. We won't receive a detailed response but only the generated text.
897
865
  if stream:
898
- raise ValueError(
899
- "Streaming token is not supported by the model. This is due to the model not been served by a "
900
- "Text-Generation-Inference server. Please pass `stream=False` as input."
901
- )
902
- if tool_choice is not None or tool_prompt is not None or tools is not None:
903
- warnings.warn(
904
- "Tools are not supported by the model. This is due to the model not been served by a "
905
- "Text-Generation-Inference server. The provided tool parameters will be ignored."
906
- )
907
- if response_format is not None:
908
- warnings.warn(
909
- "Response format is not supported by the model. This is due to the model not been served by a "
910
- "Text-Generation-Inference server. The provided response format will be ignored."
911
- )
866
+ return _async_stream_chat_completion_response(data) # type: ignore[arg-type]
912
867
 
913
- # generate response
914
- text_generation_output = await self.text_generation(
915
- prompt=messages, # type: ignore # Not correct type but works implicitly
916
- model=model,
917
- stream=False,
918
- details=False,
919
- max_new_tokens=max_tokens,
920
- seed=seed,
921
- stop_sequences=stop,
922
- temperature=temperature,
923
- top_p=top_p,
924
- )
925
-
926
- # Format as a ChatCompletionOutput with dummy values for fields we can't provide
927
- return ChatCompletionOutput(
928
- id="dummy",
929
- model="dummy",
930
- system_fingerprint="dummy",
931
- usage=None, # type: ignore # set to `None` as we don't want to provide false information
932
- created=int(time.time()),
933
- choices=[
934
- ChatCompletionOutputComplete(
935
- finish_reason="unk", # type: ignore # set to `unk` as we don't want to provide false information
936
- index=0,
937
- message=ChatCompletionOutputMessage(
938
- content=text_generation_output,
939
- role="assistant",
940
- ),
941
- )
942
- ],
943
- )
868
+ return ChatCompletionOutput.parse_obj_as_instance(data) # type: ignore[arg-type]
944
869
 
945
870
  async def conversational(
946
871
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: huggingface-hub
3
- Version: 0.24.0
3
+ Version: 0.24.1
4
4
  Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
5
5
  Home-page: https://github.com/huggingface/huggingface_hub
6
6
  Author: Hugging Face, Inc.