huggingface-hub 0.30.1__tar.gz → 0.31.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (143) hide show
  1. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/PKG-INFO +1 -1
  2. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/setup.py +2 -1
  3. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/__init__.py +1 -1
  4. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_commit_api.py +23 -4
  5. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_inference_endpoints.py +8 -5
  6. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_snapshot_download.py +2 -1
  7. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_space_api.py +0 -5
  8. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_upload_large_folder.py +26 -3
  9. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/upload.py +2 -1
  10. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/constants.py +1 -0
  11. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/file_download.py +58 -10
  12. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/hf_api.py +81 -15
  13. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_client.py +105 -150
  14. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/_async_client.py +105 -150
  15. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +2 -3
  16. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/chat_completion.py +3 -3
  17. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
  18. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_generation.py +1 -1
  19. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
  20. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
  21. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/__init__.py +55 -17
  22. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/_common.py +34 -19
  23. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/black_forest_labs.py +4 -1
  24. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/fal_ai.py +36 -11
  25. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/hf_inference.py +33 -11
  26. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/hyperbolic.py +5 -1
  27. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/nebius.py +15 -1
  28. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/novita.py +14 -1
  29. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/openai.py +3 -2
  30. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/replicate.py +22 -3
  31. huggingface_hub-0.31.0/src/huggingface_hub/inference/_providers/sambanova.py +28 -0
  32. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/together.py +15 -1
  33. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/repocard_data.py +24 -4
  34. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_pagination.py +2 -2
  35. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_runtime.py +4 -0
  36. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_xet.py +1 -12
  37. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/PKG-INFO +1 -1
  38. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/requires.txt +4 -1
  39. huggingface_hub-0.30.1/src/huggingface_hub/inference/_providers/sambanova.py +0 -6
  40. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/LICENSE +0 -0
  41. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/MANIFEST.in +0 -0
  42. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/README.md +0 -0
  43. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/pyproject.toml +0 -0
  44. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/setup.cfg +0 -0
  45. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_commit_scheduler.py +0 -0
  46. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_local_folder.py +0 -0
  47. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_login.py +0 -0
  48. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_tensorboard_logger.py +0 -0
  49. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_webhooks_payload.py +0 -0
  50. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_webhooks_server.py +0 -0
  51. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/__init__.py +0 -0
  52. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/_cli_utils.py +0 -0
  53. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/delete_cache.py +0 -0
  54. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/download.py +0 -0
  55. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/env.py +0 -0
  56. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/huggingface_cli.py +0 -0
  57. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/lfs.py +0 -0
  58. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/repo_files.py +0 -0
  59. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/scan_cache.py +0 -0
  60. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/tag.py +0 -0
  61. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/upload_large_folder.py +0 -0
  62. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/user.py +0 -0
  63. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/version.py +0 -0
  64. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/community.py +0 -0
  65. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/errors.py +0 -0
  66. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/fastai_utils.py +0 -0
  67. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/hf_file_system.py +0 -0
  68. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/hub_mixin.py +0 -0
  69. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/__init__.py +0 -0
  70. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_common.py +0 -0
  71. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/__init__.py +0 -0
  72. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/__init__.py +0 -0
  73. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/audio_classification.py +0 -0
  74. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/audio_to_audio.py +0 -0
  75. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/base.py +0 -0
  76. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/depth_estimation.py +0 -0
  77. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/document_question_answering.py +0 -0
  78. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/feature_extraction.py +0 -0
  79. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/fill_mask.py +0 -0
  80. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/image_classification.py +0 -0
  81. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/image_segmentation.py +0 -0
  82. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/image_to_image.py +0 -0
  83. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/object_detection.py +0 -0
  84. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/question_answering.py +0 -0
  85. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/sentence_similarity.py +0 -0
  86. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/summarization.py +0 -0
  87. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/table_question_answering.py +0 -0
  88. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text2text_generation.py +0 -0
  89. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_classification.py +0 -0
  90. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_to_image.py +0 -0
  91. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_to_video.py +0 -0
  92. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/token_classification.py +0 -0
  93. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/translation.py +0 -0
  94. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/video_classification.py +0 -0
  95. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/visual_question_answering.py +0 -0
  96. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/zero_shot_classification.py +0 -0
  97. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +0 -0
  98. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +0 -0
  99. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/cerebras.py +0 -0
  100. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/cohere.py +0 -0
  101. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/fireworks_ai.py +0 -0
  102. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference_api.py +0 -0
  103. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/keras_mixin.py +0 -0
  104. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/lfs.py +0 -0
  105. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/py.typed +0 -0
  106. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/repocard.py +0 -0
  107. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/repository.py +0 -0
  108. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/__init__.py +0 -0
  109. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/_base.py +0 -0
  110. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/_dduf.py +0 -0
  111. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/_tensorflow.py +0 -0
  112. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/_torch.py +0 -0
  113. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/templates/datasetcard_template.md +0 -0
  114. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/templates/modelcard_template.md +0 -0
  115. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/__init__.py +0 -0
  116. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_auth.py +0 -0
  117. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_cache_assets.py +0 -0
  118. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_cache_manager.py +0 -0
  119. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_chunk_utils.py +0 -0
  120. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_datetime.py +0 -0
  121. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_deprecation.py +0 -0
  122. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_experimental.py +0 -0
  123. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_fixes.py +0 -0
  124. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_git_credential.py +0 -0
  125. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_headers.py +0 -0
  126. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_hf_folder.py +0 -0
  127. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_http.py +0 -0
  128. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_lfs.py +0 -0
  129. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_paths.py +0 -0
  130. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_safetensors.py +0 -0
  131. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_subprocess.py +0 -0
  132. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_telemetry.py +0 -0
  133. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_typing.py +0 -0
  134. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_validators.py +0 -0
  135. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/endpoint_helpers.py +0 -0
  136. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/insecure_hashlib.py +0 -0
  137. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/logging.py +0 -0
  138. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/sha.py +0 -0
  139. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/tqdm.py +0 -0
  140. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/SOURCES.txt +0 -0
  141. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/dependency_links.txt +0 -0
  142. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/entry_points.txt +0 -0
  143. {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: huggingface_hub
3
- Version: 0.30.1
3
+ Version: 0.31.0
4
4
  Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
5
5
  Home-page: https://github.com/huggingface/huggingface_hub
6
6
  Author: Hugging Face, Inc.
@@ -14,6 +14,7 @@ def get_version() -> str:
14
14
  install_requires = [
15
15
  "filelock",
16
16
  "fsspec>=2023.5.0",
17
+ "hf-xet>=1.1.0,<2.0.0; platform_machine=='x86_64' or platform_machine=='amd64' or platform_machine=='arm64' or platform_machine=='aarch64'",
17
18
  "packaging>=20.9",
18
19
  "pyyaml>=5.1",
19
20
  "requests",
@@ -55,7 +56,7 @@ extras["tensorflow-testing"] = [
55
56
  "keras<3.0",
56
57
  ]
57
58
 
58
- extras["hf_xet"] = ["hf_xet>=0.1.4"]
59
+ extras["hf_xet"] = ["hf_xet>=1.1.0,<2.0.0"]
59
60
 
60
61
  extras["testing"] = (
61
62
  extras["cli"]
@@ -46,7 +46,7 @@ import sys
46
46
  from typing import TYPE_CHECKING
47
47
 
48
48
 
49
- __version__ = "0.30.1"
49
+ __version__ = "0.31.0"
50
50
 
51
51
  # Alphabetical order of definitions is ensured in tests
52
52
  # WARNING: any comment added in this dictionary definition will be lost when
@@ -530,7 +530,7 @@ def _upload_xet_files(
530
530
  if len(additions) == 0:
531
531
  return
532
532
  # at this point, we know that hf_xet is installed
533
- from hf_xet import upload_files
533
+ from hf_xet import upload_bytes, upload_files
534
534
 
535
535
  try:
536
536
  xet_connection_info = fetch_xet_connection_info_from_repo_info(
@@ -571,8 +571,10 @@ def _upload_xet_files(
571
571
  num_chunks_num_digits = int(math.log10(num_chunks)) + 1
572
572
  for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
573
573
  _chunk = [op for op in chunk]
574
- paths = [str(op.path_or_fileobj) for op in _chunk]
575
- expected_size = sum([os.path.getsize(path) for path in paths])
574
+
575
+ bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
576
+ paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
577
+ expected_size = sum(op.upload_info.size for op in bytes_ops + paths_ops)
576
578
 
577
579
  if num_chunks > 1:
578
580
  description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
@@ -592,7 +594,24 @@ def _upload_xet_files(
592
594
  def update_progress(increment: int):
593
595
  progress.update(increment)
594
596
 
595
- upload_files(paths, xet_endpoint, access_token_info, token_refresher, update_progress, repo_type)
597
+ if len(paths_ops) > 0:
598
+ upload_files(
599
+ [str(op.path_or_fileobj) for op in paths_ops],
600
+ xet_endpoint,
601
+ access_token_info,
602
+ token_refresher,
603
+ update_progress,
604
+ repo_type,
605
+ )
606
+ if len(bytes_ops) > 0:
607
+ upload_bytes(
608
+ [op.path_or_fileobj for op in bytes_ops],
609
+ xet_endpoint,
610
+ access_token_info,
611
+ token_refresher,
612
+ update_progress,
613
+ repo_type,
614
+ )
596
615
  return
597
616
 
598
617
 
@@ -6,14 +6,13 @@ from typing import TYPE_CHECKING, Dict, Optional, Union
6
6
 
7
7
  from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
8
8
 
9
- from .inference._client import InferenceClient
10
- from .inference._generated._async_client import AsyncInferenceClient
11
9
  from .utils import get_session, logging, parse_datetime
12
10
 
13
11
 
14
12
  if TYPE_CHECKING:
15
13
  from .hf_api import HfApi
16
-
14
+ from .inference._client import InferenceClient
15
+ from .inference._generated._async_client import AsyncInferenceClient
17
16
 
18
17
  logger = logging.get_logger(__name__)
19
18
 
@@ -138,7 +137,7 @@ class InferenceEndpoint:
138
137
  self._populate_from_raw()
139
138
 
140
139
  @property
141
- def client(self) -> InferenceClient:
140
+ def client(self) -> "InferenceClient":
142
141
  """Returns a client to make predictions on this Inference Endpoint.
143
142
 
144
143
  Returns:
@@ -152,13 +151,15 @@ class InferenceEndpoint:
152
151
  "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
153
152
  "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
154
153
  )
154
+ from .inference._client import InferenceClient
155
+
155
156
  return InferenceClient(
156
157
  model=self.url,
157
158
  token=self._token, # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok.
158
159
  )
159
160
 
160
161
  @property
161
- def async_client(self) -> AsyncInferenceClient:
162
+ def async_client(self) -> "AsyncInferenceClient":
162
163
  """Returns a client to make predictions on this Inference Endpoint.
163
164
 
164
165
  Returns:
@@ -172,6 +173,8 @@ class InferenceEndpoint:
172
173
  "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
173
174
  "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
174
175
  )
176
+ from .inference._generated._async_client import AsyncInferenceClient
177
+
175
178
  return AsyncInferenceClient(
176
179
  model=self.url,
177
180
  token=self._token, # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok.
@@ -200,12 +200,13 @@ def snapshot_download(
200
200
  commit_hash = f.read()
201
201
 
202
202
  # Try to locate snapshot folder for this commit hash
203
- if commit_hash is not None:
203
+ if commit_hash is not None and local_dir is None:
204
204
  snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
205
205
  if os.path.exists(snapshot_folder):
206
206
  # Snapshot folder exists => let's return it
207
207
  # (but we can't check if all the files are actually there)
208
208
  return snapshot_folder
209
+
209
210
  # If local_dir is not None, return it if it exists and is not empty
210
211
  if local_dir is not None:
211
212
  local_dir = Path(local_dir)
@@ -81,11 +81,6 @@ class SpaceHardware(str, Enum):
81
81
  H100 = "h100"
82
82
  H100X8 = "h100x8"
83
83
 
84
- # TPU
85
- V5E_1X1 = "v5e-1x1"
86
- V5E_2X2 = "v5e-2x2"
87
- V5E_2X4 = "v5e-2x4"
88
-
89
84
 
90
85
  class SpaceStorage(str, Enum):
91
86
  """
@@ -44,6 +44,7 @@ logger = logging.getLogger(__name__)
44
44
  WAITING_TIME_IF_NO_TASKS = 10 # seconds
45
45
  MAX_NB_REGULAR_FILES_PER_COMMIT = 75
46
46
  MAX_NB_LFS_FILES_PER_COMMIT = 150
47
+ COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
47
48
 
48
49
 
49
50
  def upload_large_folder_internal(
@@ -184,6 +185,8 @@ class LargeUploadStatus:
184
185
  self.last_commit_attempt: Optional[float] = None
185
186
 
186
187
  self._started_at = datetime.now()
188
+ self._chunk_idx: int = 1
189
+ self._chunk_lock: Lock = Lock()
187
190
 
188
191
  # Setup queues
189
192
  for item in self.items:
@@ -199,6 +202,21 @@ class LargeUploadStatus:
199
202
  else:
200
203
  logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
201
204
 
205
+ def target_chunk(self) -> int:
206
+ with self._chunk_lock:
207
+ return COMMIT_SIZE_SCALE[self._chunk_idx]
208
+
209
+ def update_chunk(self, success: bool, nb_items: int, duration: float) -> None:
210
+ with self._chunk_lock:
211
+ if not success:
212
+ logger.warning(f"Failed to commit {nb_items} files at once. Will retry with less files in next batch.")
213
+ self._chunk_idx -= 1
214
+ elif nb_items >= COMMIT_SIZE_SCALE[self._chunk_idx] and duration < 40:
215
+ logger.info(f"Successfully committed {nb_items} at once. Increasing the limit for next batch.")
216
+ self._chunk_idx += 1
217
+
218
+ self._chunk_idx = max(0, min(self._chunk_idx, len(COMMIT_SIZE_SCALE) - 1))
219
+
202
220
  def current_report(self) -> str:
203
221
  """Generate a report of the current status of the large upload."""
204
222
  nb_hashed = 0
@@ -351,6 +369,8 @@ def _worker_job(
351
369
  status.nb_workers_preupload_lfs -= 1
352
370
 
353
371
  elif job == WorkerJob.COMMIT:
372
+ start_ts = time.time()
373
+ success = True
354
374
  try:
355
375
  _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
356
376
  except KeyboardInterrupt:
@@ -360,6 +380,9 @@ def _worker_job(
360
380
  traceback.format_exc()
361
381
  for item in items:
362
382
  status.queue_commit.put(item)
383
+ success = False
384
+ duration = time.time() - start_ts
385
+ status.update_chunk(success, len(items), duration)
363
386
  with status.lock:
364
387
  status.last_commit_attempt = time.time()
365
388
  status.nb_workers_commit -= 1
@@ -393,7 +416,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
393
416
  elif status.queue_get_upload_mode.qsize() >= 10:
394
417
  status.nb_workers_get_upload_mode += 1
395
418
  logger.debug("Job: get upload mode (>10 files ready)")
396
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
419
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
397
420
 
398
421
  # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
399
422
  elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
@@ -411,7 +434,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
411
434
  elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
412
435
  status.nb_workers_get_upload_mode += 1
413
436
  logger.debug("Job: get upload mode (no other worker getting upload mode)")
414
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
437
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
415
438
 
416
439
  # 7. Preupload LFS file if at least 1 file
417
440
  # Skip if hf_transfer is enabled and there is already a worker preuploading LFS
@@ -432,7 +455,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
432
455
  elif status.queue_get_upload_mode.qsize() > 0:
433
456
  status.nb_workers_get_upload_mode += 1
434
457
  logger.debug("Job: get upload mode")
435
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
458
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
436
459
 
437
460
  # 10. Commit if at least 1 file and 1 min since last commit attempt
438
461
  elif (
@@ -59,6 +59,7 @@ from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER
59
59
  from huggingface_hub.errors import RevisionNotFoundError
60
60
  from huggingface_hub.hf_api import HfApi
61
61
  from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
62
+ from huggingface_hub.utils._runtime import is_xet_available
62
63
 
63
64
 
64
65
  logger = logging.get_logger(__name__)
@@ -215,7 +216,7 @@ class UploadCommand(BaseHuggingfaceCLICommand):
215
216
  if self.delete is not None and len(self.delete) > 0:
216
217
  warnings.warn("Ignoring `--delete` since a single file is uploaded.")
217
218
 
218
- if not HF_HUB_ENABLE_HF_TRANSFER:
219
+ if not is_xet_available() and not HF_HUB_ENABLE_HF_TRANSFER:
219
220
  logger.info(
220
221
  "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See"
221
222
  " https://huggingface.co/docs/huggingface_hub/hf_transfer for more details."
@@ -36,6 +36,7 @@ DEFAULT_DOWNLOAD_TIMEOUT = 10
36
36
  DEFAULT_REQUEST_TIMEOUT = 10
37
37
  DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
38
38
  HF_TRANSFER_CONCURRENCY = 100
39
+ MAX_HTTP_DOWNLOAD_SIZE = 50 * 1000 * 1000 * 1000 # 50 GB
39
40
 
40
41
  # Constants for serialization
41
42
 
@@ -44,7 +44,6 @@ from .utils import (
44
44
  get_graphviz_version, # noqa: F401 # for backward compatibility
45
45
  get_jinja_version, # noqa: F401 # for backward compatibility
46
46
  get_pydot_version, # noqa: F401 # for backward compatibility
47
- get_session,
48
47
  get_tf_version, # noqa: F401 # for backward compatibility
49
48
  get_torch_version, # noqa: F401 # for backward compatibility
50
49
  hf_raise_for_status,
@@ -62,7 +61,7 @@ from .utils import (
62
61
  tqdm,
63
62
  validate_hf_hub_args,
64
63
  )
65
- from .utils._http import _adjust_range_header
64
+ from .utils._http import _adjust_range_header, http_backoff
66
65
  from .utils._runtime import _PY_VERSION, is_xet_available # noqa: F401 # for backward compatibility
67
66
  from .utils._typing import HTTP_METHOD_T
68
67
  from .utils.sha import sha_fileobj
@@ -268,6 +267,8 @@ def _request_wrapper(
268
267
  """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
269
268
  `allow_redirection=False`.
270
269
 
270
+ A backoff mechanism retries the HTTP call on 429, 503 and 504 errors.
271
+
271
272
  Args:
272
273
  method (`str`):
273
274
  HTTP method, such as 'GET' or 'HEAD'.
@@ -305,11 +306,40 @@ def _request_wrapper(
305
306
  return response
306
307
 
307
308
  # Perform request and return if status_code is not in the retry list.
308
- response = get_session().request(method=method, url=url, **params)
309
+ response = http_backoff(method=method, url=url, **params, retry_on_exceptions=(), retry_on_status_codes=(429,))
309
310
  hf_raise_for_status(response)
310
311
  return response
311
312
 
312
313
 
314
+ def _get_file_length_from_http_response(response: requests.Response) -> Optional[int]:
315
+ """
316
+ Get the length of the file from the HTTP response headers.
317
+
318
+ This function extracts the file size from the HTTP response headers, either from the
319
+ `Content-Range` or `Content-Length` header, if available (in that order).
320
+ The HTTP response object containing the headers.
321
+ `int` or `None`: The length of the file in bytes if the information is available,
322
+ otherwise `None`.
323
+
324
+ Args:
325
+ response (`requests.Response`):
326
+ The HTTP response object.
327
+
328
+ Returns:
329
+ `int` or `None`: The length of the file in bytes, or None if not available.
330
+ """
331
+
332
+ content_range = response.headers.get("Content-Range")
333
+ if content_range is not None:
334
+ return int(content_range.rsplit("/")[-1])
335
+
336
+ content_length = response.headers.get("Content-Length")
337
+ if content_length is not None:
338
+ return int(content_length)
339
+
340
+ return None
341
+
342
+
313
343
  def http_get(
314
344
  url: str,
315
345
  temp_file: BinaryIO,
@@ -352,12 +382,15 @@ def http_get(
352
382
  # If the file is already fully downloaded, we don't need to download it again.
353
383
  return
354
384
 
385
+ has_custom_range_header = headers is not None and any(h.lower() == "range" for h in headers)
355
386
  hf_transfer = None
356
387
  if constants.HF_HUB_ENABLE_HF_TRANSFER:
357
388
  if resume_size != 0:
358
389
  warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
359
390
  elif proxies is not None:
360
391
  warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
392
+ elif has_custom_range_header:
393
+ warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
361
394
  else:
362
395
  try:
363
396
  import hf_transfer # type: ignore[no-redef]
@@ -372,12 +405,24 @@ def http_get(
372
405
  headers = copy.deepcopy(headers) or {}
373
406
  if resume_size > 0:
374
407
  headers["Range"] = _adjust_range_header(headers.get("Range"), resume_size)
408
+ elif expected_size and expected_size > constants.MAX_HTTP_DOWNLOAD_SIZE:
409
+ # Any files over 50GB will not be available through basic http request.
410
+ # Setting the range header to 0-0 will force the server to return the file size in the Content-Range header.
411
+ # Since hf_transfer splits the download into chunks, the process will succeed afterwards.
412
+ if hf_transfer:
413
+ headers["Range"] = "bytes=0-0"
414
+ else:
415
+ raise ValueError(
416
+ "The file is too large to be downloaded using the regular download method. Use `hf_transfer` or `hf_xet` instead."
417
+ " Try `pip install hf_transfer` or `pip install hf_xet`."
418
+ )
375
419
 
376
420
  r = _request_wrapper(
377
421
  method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
378
422
  )
423
+
379
424
  hf_raise_for_status(r)
380
- content_length = r.headers.get("Content-Length")
425
+ content_length = _get_file_length_from_http_response(r)
381
426
 
382
427
  # NOTE: 'total' is the total number of bytes to download, not the number of bytes in the file.
383
428
  # If the file is compressed, the number of bytes in the saved file will be higher than 'total'.
@@ -425,7 +470,7 @@ def http_get(
425
470
  filename=temp_file.name,
426
471
  max_files=constants.HF_TRANSFER_CONCURRENCY,
427
472
  chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
428
- headers=headers,
473
+ headers=initial_headers,
429
474
  parallel_failures=3,
430
475
  max_retries=5,
431
476
  **({"callback": progress.update} if supports_callback else {}),
@@ -537,11 +582,11 @@ def xet_get(
537
582
 
538
583
  """
539
584
  try:
540
- from hf_xet import PyPointerFile, download_files # type: ignore[no-redef]
585
+ from hf_xet import PyXetDownloadInfo, download_files # type: ignore[no-redef]
541
586
  except ImportError:
542
587
  raise ValueError(
543
588
  "To use optimized download using Xet storage, you need to install the hf_xet package. "
544
- "Try `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`."
589
+ 'Try `pip install "huggingface_hub[hf_xet]"` or `pip install hf_xet`.'
545
590
  )
546
591
 
547
592
  connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
@@ -552,8 +597,10 @@ def xet_get(
552
597
  raise ValueError("Failed to refresh token using xet metadata.")
553
598
  return connection_info.access_token, connection_info.expiration_unix_epoch
554
599
 
555
- pointer_files = [
556
- PyPointerFile(path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, filesize=expected_size)
600
+ xet_download_info = [
601
+ PyXetDownloadInfo(
602
+ destination_path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, file_size=expected_size
603
+ )
557
604
  ]
558
605
 
559
606
  if not displayed_filename:
@@ -578,7 +625,7 @@ def xet_get(
578
625
  progress.update(progress_bytes)
579
626
 
580
627
  download_files(
581
- pointer_files,
628
+ xet_download_info,
582
629
  endpoint=connection_info.endpoint,
583
630
  token_info=(connection_info.access_token, connection_info.expiration_unix_epoch),
584
631
  token_refresher=token_refresher,
@@ -1672,6 +1719,7 @@ def _download_to_tmp_and_move(
1672
1719
  "Falling back to regular HTTP download. "
1673
1720
  "For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`"
1674
1721
  )
1722
+
1675
1723
  http_get(
1676
1724
  url_to_download,
1677
1725
  f,
@@ -708,14 +708,21 @@ class RepoFolder:
708
708
 
709
709
  @dataclass
710
710
  class InferenceProviderMapping:
711
+ hf_model_id: str
711
712
  status: Literal["live", "staging"]
712
713
  provider_id: str
713
714
  task: str
714
715
 
716
+ adapter: Optional[str] = None
717
+ adapter_weights_path: Optional[str] = None
718
+
715
719
  def __init__(self, **kwargs):
720
+ self.hf_model_id = kwargs.pop("hf_model_id")
716
721
  self.status = kwargs.pop("status")
717
722
  self.provider_id = kwargs.pop("providerId")
718
723
  self.task = kwargs.pop("task")
724
+ self.adapter = kwargs.pop("adapter", None)
725
+ self.adapter_weights_path = kwargs.pop("adapterWeightsPath", None)
719
726
  self.__dict__.update(**kwargs)
720
727
 
721
728
 
@@ -847,7 +854,9 @@ class ModelInfo:
847
854
  self.inference_provider_mapping = kwargs.pop("inferenceProviderMapping", None)
848
855
  if self.inference_provider_mapping:
849
856
  self.inference_provider_mapping = {
850
- provider: InferenceProviderMapping(**value)
857
+ provider: InferenceProviderMapping(
858
+ **{**value, "hf_model_id": self.id}
859
+ ) # little hack to simplify Inference Providers logic
851
860
  for provider, value in self.inference_provider_mapping.items()
852
861
  }
853
862
 
@@ -4466,18 +4475,17 @@ class HfApi:
4466
4475
  expand="xetEnabled",
4467
4476
  token=token,
4468
4477
  ).xet_enabled
4469
- has_binary_data = any(
4470
- isinstance(addition.path_or_fileobj, (bytes, io.BufferedIOBase))
4471
- for addition in new_lfs_additions_to_upload
4478
+ has_buffered_io_data = any(
4479
+ isinstance(addition.path_or_fileobj, io.BufferedIOBase) for addition in new_lfs_additions_to_upload
4472
4480
  )
4473
- if xet_enabled and not has_binary_data and is_xet_available():
4481
+ if xet_enabled and not has_buffered_io_data and is_xet_available():
4474
4482
  logger.info("Uploading files using Xet Storage..")
4475
4483
  _upload_xet_files(**upload_kwargs, create_pr=create_pr) # type: ignore [arg-type]
4476
4484
  else:
4477
4485
  if xet_enabled and is_xet_available():
4478
- if has_binary_data:
4486
+ if has_buffered_io_data:
4479
4487
  logger.warning(
4480
- "Uploading files as bytes or binary IO objects is not supported by Xet Storage. "
4488
+ "Uploading files as a binary IO buffer is not supported by Xet Storage. "
4481
4489
  "Falling back to HTTP upload."
4482
4490
  )
4483
4491
  _upload_lfs_files(**upload_kwargs, num_threads=num_threads) # type: ignore [arg-type]
@@ -7564,8 +7572,13 @@ class HfApi:
7564
7572
  revision: Optional[str] = None,
7565
7573
  task: Optional[str] = None,
7566
7574
  custom_image: Optional[Dict] = None,
7575
+ env: Optional[Dict[str, str]] = None,
7567
7576
  secrets: Optional[Dict[str, str]] = None,
7568
7577
  type: InferenceEndpointType = InferenceEndpointType.PROTECTED,
7578
+ domain: Optional[str] = None,
7579
+ path: Optional[str] = None,
7580
+ cache_http_responses: Optional[bool] = None,
7581
+ tags: Optional[List[str]] = None,
7569
7582
  namespace: Optional[str] = None,
7570
7583
  token: Union[bool, str, None] = None,
7571
7584
  ) -> InferenceEndpoint:
@@ -7603,10 +7616,20 @@ class HfApi:
7603
7616
  custom_image (`Dict`, *optional*):
7604
7617
  A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
7605
7618
  Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7619
+ env (`Dict[str, str]`, *optional*):
7620
+ Non-secret environment variables to inject in the container environment.
7606
7621
  secrets (`Dict[str, str]`, *optional*):
7607
7622
  Secret values to inject in the container environment.
7608
7623
  type ([`InferenceEndpointType]`, *optional*):
7609
7624
  The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`.
7625
+ domain (`str`, *optional*):
7626
+ The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
7627
+ path (`str`, *optional*):
7628
+ The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
7629
+ cache_http_responses (`bool`, *optional*):
7630
+ Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`.
7631
+ tags (`List[str]`, *optional*):
7632
+ A list of tags to associate with the Inference Endpoint.
7610
7633
  namespace (`str`, *optional*):
7611
7634
  The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
7612
7635
  token (Union[bool, str, None], optional):
@@ -7657,17 +7680,18 @@ class HfApi:
7657
7680
  ... type="protected",
7658
7681
  ... instance_size="x1",
7659
7682
  ... instance_type="nvidia-a10g",
7683
+ ... env={
7684
+ ... "MAX_BATCH_PREFILL_TOKENS": "2048",
7685
+ ... "MAX_INPUT_LENGTH": "1024",
7686
+ ... "MAX_TOTAL_TOKENS": "1512",
7687
+ ... "MODEL_ID": "/repository"
7688
+ ... },
7660
7689
  ... custom_image={
7661
7690
  ... "health_route": "/health",
7662
- ... "env": {
7663
- ... "MAX_BATCH_PREFILL_TOKENS": "2048",
7664
- ... "MAX_INPUT_LENGTH": "1024",
7665
- ... "MAX_TOTAL_TOKENS": "1512",
7666
- ... "MODEL_ID": "/repository"
7667
- ... },
7668
7691
  ... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
7669
7692
  ... },
7670
7693
  ... secrets={"MY_SECRET_KEY": "secret_value"},
7694
+ ... tags=["dev", "text-generation"],
7671
7695
  ... )
7672
7696
 
7673
7697
  ```
@@ -7701,8 +7725,21 @@ class HfApi:
7701
7725
  },
7702
7726
  "type": type,
7703
7727
  }
7728
+ if env:
7729
+ payload["model"]["env"] = env
7704
7730
  if secrets:
7705
7731
  payload["model"]["secrets"] = secrets
7732
+ if domain is not None or path is not None:
7733
+ payload["route"] = {}
7734
+ if domain is not None:
7735
+ payload["route"]["domain"] = domain
7736
+ if path is not None:
7737
+ payload["route"]["path"] = path
7738
+ if cache_http_responses is not None:
7739
+ payload["cacheHttpResponses"] = cache_http_responses
7740
+ if tags is not None:
7741
+ payload["tags"] = tags
7742
+
7706
7743
  response = get_session().post(
7707
7744
  f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}",
7708
7745
  headers=self._build_hf_headers(token=token),
@@ -7864,15 +7901,21 @@ class HfApi:
7864
7901
  revision: Optional[str] = None,
7865
7902
  task: Optional[str] = None,
7866
7903
  custom_image: Optional[Dict] = None,
7904
+ env: Optional[Dict[str, str]] = None,
7867
7905
  secrets: Optional[Dict[str, str]] = None,
7906
+ # Route update
7907
+ domain: Optional[str] = None,
7908
+ path: Optional[str] = None,
7868
7909
  # Other
7910
+ cache_http_responses: Optional[bool] = None,
7911
+ tags: Optional[List[str]] = None,
7869
7912
  namespace: Optional[str] = None,
7870
7913
  token: Union[bool, str, None] = None,
7871
7914
  ) -> InferenceEndpoint:
7872
7915
  """Update an Inference Endpoint.
7873
7916
 
7874
- This method allows the update of either the compute configuration, the deployed model, or both. All arguments are
7875
- optional but at least one must be provided.
7917
+ This method allows the update of either the compute configuration, the deployed model, the route, or any combination.
7918
+ All arguments are optional but at least one must be provided.
7876
7919
 
7877
7920
  For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`].
7878
7921
 
@@ -7904,8 +7947,21 @@ class HfApi:
7904
7947
  custom_image (`Dict`, *optional*):
7905
7948
  A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
7906
7949
  Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
7950
+ env (`Dict[str, str]`, *optional*):
7951
+ Non-secret environment variables to inject in the container environment
7907
7952
  secrets (`Dict[str, str]`, *optional*):
7908
7953
  Secret values to inject in the container environment.
7954
+
7955
+ domain (`str`, *optional*):
7956
+ The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
7957
+ path (`str`, *optional*):
7958
+ The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
7959
+
7960
+ cache_http_responses (`bool`, *optional*):
7961
+ Whether to cache HTTP responses from the Inference Endpoint.
7962
+ tags (`List[str]`, *optional*):
7963
+ A list of tags to associate with the Inference Endpoint.
7964
+
7909
7965
  namespace (`str`, *optional*):
7910
7966
  The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace.
7911
7967
  token (Union[bool, str, None], optional):
@@ -7943,8 +7999,18 @@ class HfApi:
7943
7999
  payload["model"]["task"] = task
7944
8000
  if custom_image is not None:
7945
8001
  payload["model"]["image"] = {"custom": custom_image}
8002
+ if env is not None:
8003
+ payload["model"]["env"] = env
7946
8004
  if secrets is not None:
7947
8005
  payload["model"]["secrets"] = secrets
8006
+ if domain is not None:
8007
+ payload["route"]["domain"] = domain
8008
+ if path is not None:
8009
+ payload["route"]["path"] = path
8010
+ if cache_http_responses is not None:
8011
+ payload["cacheHttpResponses"] = cache_http_responses
8012
+ if tags is not None:
8013
+ payload["tags"] = tags
7948
8014
 
7949
8015
  response = get_session().put(
7950
8016
  f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",