huggingface-hub 0.30.1__tar.gz → 0.31.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/PKG-INFO +1 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/setup.py +2 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/__init__.py +1 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_commit_api.py +23 -4
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_inference_endpoints.py +8 -5
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_snapshot_download.py +2 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_space_api.py +0 -5
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_upload_large_folder.py +26 -3
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/upload.py +2 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/constants.py +1 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/file_download.py +58 -10
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/hf_api.py +81 -15
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_client.py +105 -150
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/_async_client.py +105 -150
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +2 -3
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/chat_completion.py +3 -3
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_generation.py +1 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/__init__.py +55 -17
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/_common.py +34 -19
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/black_forest_labs.py +4 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/fal_ai.py +36 -11
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/hf_inference.py +33 -11
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/hyperbolic.py +5 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/nebius.py +15 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/novita.py +14 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/openai.py +3 -2
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/replicate.py +22 -3
- huggingface_hub-0.31.0/src/huggingface_hub/inference/_providers/sambanova.py +28 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/together.py +15 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/repocard_data.py +24 -4
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_pagination.py +2 -2
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_runtime.py +4 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_xet.py +1 -12
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/PKG-INFO +1 -1
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/requires.txt +4 -1
- huggingface_hub-0.30.1/src/huggingface_hub/inference/_providers/sambanova.py +0 -6
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/LICENSE +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/MANIFEST.in +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/README.md +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/pyproject.toml +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/setup.cfg +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_commit_scheduler.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_local_folder.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_login.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_tensorboard_logger.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_webhooks_payload.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_webhooks_server.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/__init__.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/_cli_utils.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/delete_cache.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/download.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/env.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/huggingface_cli.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/lfs.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/repo_files.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/scan_cache.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/tag.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/upload_large_folder.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/user.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/version.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/community.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/errors.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/fastai_utils.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/hf_file_system.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/hub_mixin.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/__init__.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_common.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/__init__.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/__init__.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/audio_classification.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/audio_to_audio.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/base.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/depth_estimation.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/document_question_answering.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/feature_extraction.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/fill_mask.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/image_classification.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/image_segmentation.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/image_to_image.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/object_detection.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/question_answering.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/sentence_similarity.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/summarization.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/table_question_answering.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text2text_generation.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_classification.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_to_image.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/text_to_video.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/token_classification.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/translation.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/video_classification.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/visual_question_answering.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/zero_shot_classification.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/cerebras.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/cohere.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference/_providers/fireworks_ai.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/inference_api.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/keras_mixin.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/lfs.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/py.typed +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/repocard.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/repository.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/__init__.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/_base.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/_dduf.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/_tensorflow.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/serialization/_torch.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/templates/datasetcard_template.md +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/templates/modelcard_template.md +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/__init__.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_auth.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_cache_assets.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_cache_manager.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_chunk_utils.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_datetime.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_deprecation.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_experimental.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_fixes.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_git_credential.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_headers.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_hf_folder.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_http.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_lfs.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_paths.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_safetensors.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_subprocess.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_telemetry.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_typing.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/_validators.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/endpoint_helpers.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/insecure_hashlib.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/logging.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/sha.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/utils/tqdm.py +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/SOURCES.txt +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/dependency_links.txt +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/entry_points.txt +0 -0
- {huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: huggingface_hub
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31.0
|
|
4
4
|
Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
|
|
5
5
|
Home-page: https://github.com/huggingface/huggingface_hub
|
|
6
6
|
Author: Hugging Face, Inc.
|
|
@@ -14,6 +14,7 @@ def get_version() -> str:
|
|
|
14
14
|
install_requires = [
|
|
15
15
|
"filelock",
|
|
16
16
|
"fsspec>=2023.5.0",
|
|
17
|
+
"hf-xet>=1.1.0,<2.0.0; platform_machine=='x86_64' or platform_machine=='amd64' or platform_machine=='arm64' or platform_machine=='aarch64'",
|
|
17
18
|
"packaging>=20.9",
|
|
18
19
|
"pyyaml>=5.1",
|
|
19
20
|
"requests",
|
|
@@ -55,7 +56,7 @@ extras["tensorflow-testing"] = [
|
|
|
55
56
|
"keras<3.0",
|
|
56
57
|
]
|
|
57
58
|
|
|
58
|
-
extras["hf_xet"] = ["hf_xet>=
|
|
59
|
+
extras["hf_xet"] = ["hf_xet>=1.1.0,<2.0.0"]
|
|
59
60
|
|
|
60
61
|
extras["testing"] = (
|
|
61
62
|
extras["cli"]
|
|
@@ -530,7 +530,7 @@ def _upload_xet_files(
|
|
|
530
530
|
if len(additions) == 0:
|
|
531
531
|
return
|
|
532
532
|
# at this point, we know that hf_xet is installed
|
|
533
|
-
from hf_xet import upload_files
|
|
533
|
+
from hf_xet import upload_bytes, upload_files
|
|
534
534
|
|
|
535
535
|
try:
|
|
536
536
|
xet_connection_info = fetch_xet_connection_info_from_repo_info(
|
|
@@ -571,8 +571,10 @@ def _upload_xet_files(
|
|
|
571
571
|
num_chunks_num_digits = int(math.log10(num_chunks)) + 1
|
|
572
572
|
for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
|
|
573
573
|
_chunk = [op for op in chunk]
|
|
574
|
-
|
|
575
|
-
|
|
574
|
+
|
|
575
|
+
bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
|
|
576
|
+
paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
|
|
577
|
+
expected_size = sum(op.upload_info.size for op in bytes_ops + paths_ops)
|
|
576
578
|
|
|
577
579
|
if num_chunks > 1:
|
|
578
580
|
description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
|
|
@@ -592,7 +594,24 @@ def _upload_xet_files(
|
|
|
592
594
|
def update_progress(increment: int):
|
|
593
595
|
progress.update(increment)
|
|
594
596
|
|
|
595
|
-
|
|
597
|
+
if len(paths_ops) > 0:
|
|
598
|
+
upload_files(
|
|
599
|
+
[str(op.path_or_fileobj) for op in paths_ops],
|
|
600
|
+
xet_endpoint,
|
|
601
|
+
access_token_info,
|
|
602
|
+
token_refresher,
|
|
603
|
+
update_progress,
|
|
604
|
+
repo_type,
|
|
605
|
+
)
|
|
606
|
+
if len(bytes_ops) > 0:
|
|
607
|
+
upload_bytes(
|
|
608
|
+
[op.path_or_fileobj for op in bytes_ops],
|
|
609
|
+
xet_endpoint,
|
|
610
|
+
access_token_info,
|
|
611
|
+
token_refresher,
|
|
612
|
+
update_progress,
|
|
613
|
+
repo_type,
|
|
614
|
+
)
|
|
596
615
|
return
|
|
597
616
|
|
|
598
617
|
|
{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_inference_endpoints.py
RENAMED
|
@@ -6,14 +6,13 @@ from typing import TYPE_CHECKING, Dict, Optional, Union
|
|
|
6
6
|
|
|
7
7
|
from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
|
|
8
8
|
|
|
9
|
-
from .inference._client import InferenceClient
|
|
10
|
-
from .inference._generated._async_client import AsyncInferenceClient
|
|
11
9
|
from .utils import get_session, logging, parse_datetime
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
if TYPE_CHECKING:
|
|
15
13
|
from .hf_api import HfApi
|
|
16
|
-
|
|
14
|
+
from .inference._client import InferenceClient
|
|
15
|
+
from .inference._generated._async_client import AsyncInferenceClient
|
|
17
16
|
|
|
18
17
|
logger = logging.get_logger(__name__)
|
|
19
18
|
|
|
@@ -138,7 +137,7 @@ class InferenceEndpoint:
|
|
|
138
137
|
self._populate_from_raw()
|
|
139
138
|
|
|
140
139
|
@property
|
|
141
|
-
def client(self) -> InferenceClient:
|
|
140
|
+
def client(self) -> "InferenceClient":
|
|
142
141
|
"""Returns a client to make predictions on this Inference Endpoint.
|
|
143
142
|
|
|
144
143
|
Returns:
|
|
@@ -152,13 +151,15 @@ class InferenceEndpoint:
|
|
|
152
151
|
"Cannot create a client for this Inference Endpoint as it is not yet deployed. "
|
|
153
152
|
"Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
|
|
154
153
|
)
|
|
154
|
+
from .inference._client import InferenceClient
|
|
155
|
+
|
|
155
156
|
return InferenceClient(
|
|
156
157
|
model=self.url,
|
|
157
158
|
token=self._token, # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok.
|
|
158
159
|
)
|
|
159
160
|
|
|
160
161
|
@property
|
|
161
|
-
def async_client(self) -> AsyncInferenceClient:
|
|
162
|
+
def async_client(self) -> "AsyncInferenceClient":
|
|
162
163
|
"""Returns a client to make predictions on this Inference Endpoint.
|
|
163
164
|
|
|
164
165
|
Returns:
|
|
@@ -172,6 +173,8 @@ class InferenceEndpoint:
|
|
|
172
173
|
"Cannot create a client for this Inference Endpoint as it is not yet deployed. "
|
|
173
174
|
"Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
|
|
174
175
|
)
|
|
176
|
+
from .inference._generated._async_client import AsyncInferenceClient
|
|
177
|
+
|
|
175
178
|
return AsyncInferenceClient(
|
|
176
179
|
model=self.url,
|
|
177
180
|
token=self._token, # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok.
|
|
@@ -200,12 +200,13 @@ def snapshot_download(
|
|
|
200
200
|
commit_hash = f.read()
|
|
201
201
|
|
|
202
202
|
# Try to locate snapshot folder for this commit hash
|
|
203
|
-
if commit_hash is not None:
|
|
203
|
+
if commit_hash is not None and local_dir is None:
|
|
204
204
|
snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
|
|
205
205
|
if os.path.exists(snapshot_folder):
|
|
206
206
|
# Snapshot folder exists => let's return it
|
|
207
207
|
# (but we can't check if all the files are actually there)
|
|
208
208
|
return snapshot_folder
|
|
209
|
+
|
|
209
210
|
# If local_dir is not None, return it if it exists and is not empty
|
|
210
211
|
if local_dir is not None:
|
|
211
212
|
local_dir = Path(local_dir)
|
{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_upload_large_folder.py
RENAMED
|
@@ -44,6 +44,7 @@ logger = logging.getLogger(__name__)
|
|
|
44
44
|
WAITING_TIME_IF_NO_TASKS = 10 # seconds
|
|
45
45
|
MAX_NB_REGULAR_FILES_PER_COMMIT = 75
|
|
46
46
|
MAX_NB_LFS_FILES_PER_COMMIT = 150
|
|
47
|
+
COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
def upload_large_folder_internal(
|
|
@@ -184,6 +185,8 @@ class LargeUploadStatus:
|
|
|
184
185
|
self.last_commit_attempt: Optional[float] = None
|
|
185
186
|
|
|
186
187
|
self._started_at = datetime.now()
|
|
188
|
+
self._chunk_idx: int = 1
|
|
189
|
+
self._chunk_lock: Lock = Lock()
|
|
187
190
|
|
|
188
191
|
# Setup queues
|
|
189
192
|
for item in self.items:
|
|
@@ -199,6 +202,21 @@ class LargeUploadStatus:
|
|
|
199
202
|
else:
|
|
200
203
|
logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
|
|
201
204
|
|
|
205
|
+
def target_chunk(self) -> int:
|
|
206
|
+
with self._chunk_lock:
|
|
207
|
+
return COMMIT_SIZE_SCALE[self._chunk_idx]
|
|
208
|
+
|
|
209
|
+
def update_chunk(self, success: bool, nb_items: int, duration: float) -> None:
|
|
210
|
+
with self._chunk_lock:
|
|
211
|
+
if not success:
|
|
212
|
+
logger.warning(f"Failed to commit {nb_items} files at once. Will retry with less files in next batch.")
|
|
213
|
+
self._chunk_idx -= 1
|
|
214
|
+
elif nb_items >= COMMIT_SIZE_SCALE[self._chunk_idx] and duration < 40:
|
|
215
|
+
logger.info(f"Successfully committed {nb_items} at once. Increasing the limit for next batch.")
|
|
216
|
+
self._chunk_idx += 1
|
|
217
|
+
|
|
218
|
+
self._chunk_idx = max(0, min(self._chunk_idx, len(COMMIT_SIZE_SCALE) - 1))
|
|
219
|
+
|
|
202
220
|
def current_report(self) -> str:
|
|
203
221
|
"""Generate a report of the current status of the large upload."""
|
|
204
222
|
nb_hashed = 0
|
|
@@ -351,6 +369,8 @@ def _worker_job(
|
|
|
351
369
|
status.nb_workers_preupload_lfs -= 1
|
|
352
370
|
|
|
353
371
|
elif job == WorkerJob.COMMIT:
|
|
372
|
+
start_ts = time.time()
|
|
373
|
+
success = True
|
|
354
374
|
try:
|
|
355
375
|
_commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
|
|
356
376
|
except KeyboardInterrupt:
|
|
@@ -360,6 +380,9 @@ def _worker_job(
|
|
|
360
380
|
traceback.format_exc()
|
|
361
381
|
for item in items:
|
|
362
382
|
status.queue_commit.put(item)
|
|
383
|
+
success = False
|
|
384
|
+
duration = time.time() - start_ts
|
|
385
|
+
status.update_chunk(success, len(items), duration)
|
|
363
386
|
with status.lock:
|
|
364
387
|
status.last_commit_attempt = time.time()
|
|
365
388
|
status.nb_workers_commit -= 1
|
|
@@ -393,7 +416,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
|
|
|
393
416
|
elif status.queue_get_upload_mode.qsize() >= 10:
|
|
394
417
|
status.nb_workers_get_upload_mode += 1
|
|
395
418
|
logger.debug("Job: get upload mode (>10 files ready)")
|
|
396
|
-
return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode,
|
|
419
|
+
return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
|
|
397
420
|
|
|
398
421
|
# 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
|
|
399
422
|
elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
|
|
@@ -411,7 +434,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
|
|
|
411
434
|
elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
|
|
412
435
|
status.nb_workers_get_upload_mode += 1
|
|
413
436
|
logger.debug("Job: get upload mode (no other worker getting upload mode)")
|
|
414
|
-
return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode,
|
|
437
|
+
return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
|
|
415
438
|
|
|
416
439
|
# 7. Preupload LFS file if at least 1 file
|
|
417
440
|
# Skip if hf_transfer is enabled and there is already a worker preuploading LFS
|
|
@@ -432,7 +455,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
|
|
|
432
455
|
elif status.queue_get_upload_mode.qsize() > 0:
|
|
433
456
|
status.nb_workers_get_upload_mode += 1
|
|
434
457
|
logger.debug("Job: get upload mode")
|
|
435
|
-
return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode,
|
|
458
|
+
return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
|
|
436
459
|
|
|
437
460
|
# 10. Commit if at least 1 file and 1 min since last commit attempt
|
|
438
461
|
elif (
|
|
@@ -59,6 +59,7 @@ from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER
|
|
|
59
59
|
from huggingface_hub.errors import RevisionNotFoundError
|
|
60
60
|
from huggingface_hub.hf_api import HfApi
|
|
61
61
|
from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
|
|
62
|
+
from huggingface_hub.utils._runtime import is_xet_available
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
logger = logging.get_logger(__name__)
|
|
@@ -215,7 +216,7 @@ class UploadCommand(BaseHuggingfaceCLICommand):
|
|
|
215
216
|
if self.delete is not None and len(self.delete) > 0:
|
|
216
217
|
warnings.warn("Ignoring `--delete` since a single file is uploaded.")
|
|
217
218
|
|
|
218
|
-
if not HF_HUB_ENABLE_HF_TRANSFER:
|
|
219
|
+
if not is_xet_available() and not HF_HUB_ENABLE_HF_TRANSFER:
|
|
219
220
|
logger.info(
|
|
220
221
|
"Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See"
|
|
221
222
|
" https://huggingface.co/docs/huggingface_hub/hf_transfer for more details."
|
|
@@ -44,7 +44,6 @@ from .utils import (
|
|
|
44
44
|
get_graphviz_version, # noqa: F401 # for backward compatibility
|
|
45
45
|
get_jinja_version, # noqa: F401 # for backward compatibility
|
|
46
46
|
get_pydot_version, # noqa: F401 # for backward compatibility
|
|
47
|
-
get_session,
|
|
48
47
|
get_tf_version, # noqa: F401 # for backward compatibility
|
|
49
48
|
get_torch_version, # noqa: F401 # for backward compatibility
|
|
50
49
|
hf_raise_for_status,
|
|
@@ -62,7 +61,7 @@ from .utils import (
|
|
|
62
61
|
tqdm,
|
|
63
62
|
validate_hf_hub_args,
|
|
64
63
|
)
|
|
65
|
-
from .utils._http import _adjust_range_header
|
|
64
|
+
from .utils._http import _adjust_range_header, http_backoff
|
|
66
65
|
from .utils._runtime import _PY_VERSION, is_xet_available # noqa: F401 # for backward compatibility
|
|
67
66
|
from .utils._typing import HTTP_METHOD_T
|
|
68
67
|
from .utils.sha import sha_fileobj
|
|
@@ -268,6 +267,8 @@ def _request_wrapper(
|
|
|
268
267
|
"""Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
|
|
269
268
|
`allow_redirection=False`.
|
|
270
269
|
|
|
270
|
+
A backoff mechanism retries the HTTP call on 429, 503 and 504 errors.
|
|
271
|
+
|
|
271
272
|
Args:
|
|
272
273
|
method (`str`):
|
|
273
274
|
HTTP method, such as 'GET' or 'HEAD'.
|
|
@@ -305,11 +306,40 @@ def _request_wrapper(
|
|
|
305
306
|
return response
|
|
306
307
|
|
|
307
308
|
# Perform request and return if status_code is not in the retry list.
|
|
308
|
-
response =
|
|
309
|
+
response = http_backoff(method=method, url=url, **params, retry_on_exceptions=(), retry_on_status_codes=(429,))
|
|
309
310
|
hf_raise_for_status(response)
|
|
310
311
|
return response
|
|
311
312
|
|
|
312
313
|
|
|
314
|
+
def _get_file_length_from_http_response(response: requests.Response) -> Optional[int]:
|
|
315
|
+
"""
|
|
316
|
+
Get the length of the file from the HTTP response headers.
|
|
317
|
+
|
|
318
|
+
This function extracts the file size from the HTTP response headers, either from the
|
|
319
|
+
`Content-Range` or `Content-Length` header, if available (in that order).
|
|
320
|
+
The HTTP response object containing the headers.
|
|
321
|
+
`int` or `None`: The length of the file in bytes if the information is available,
|
|
322
|
+
otherwise `None`.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
response (`requests.Response`):
|
|
326
|
+
The HTTP response object.
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
`int` or `None`: The length of the file in bytes, or None if not available.
|
|
330
|
+
"""
|
|
331
|
+
|
|
332
|
+
content_range = response.headers.get("Content-Range")
|
|
333
|
+
if content_range is not None:
|
|
334
|
+
return int(content_range.rsplit("/")[-1])
|
|
335
|
+
|
|
336
|
+
content_length = response.headers.get("Content-Length")
|
|
337
|
+
if content_length is not None:
|
|
338
|
+
return int(content_length)
|
|
339
|
+
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
|
|
313
343
|
def http_get(
|
|
314
344
|
url: str,
|
|
315
345
|
temp_file: BinaryIO,
|
|
@@ -352,12 +382,15 @@ def http_get(
|
|
|
352
382
|
# If the file is already fully downloaded, we don't need to download it again.
|
|
353
383
|
return
|
|
354
384
|
|
|
385
|
+
has_custom_range_header = headers is not None and any(h.lower() == "range" for h in headers)
|
|
355
386
|
hf_transfer = None
|
|
356
387
|
if constants.HF_HUB_ENABLE_HF_TRANSFER:
|
|
357
388
|
if resume_size != 0:
|
|
358
389
|
warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
|
|
359
390
|
elif proxies is not None:
|
|
360
391
|
warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
|
|
392
|
+
elif has_custom_range_header:
|
|
393
|
+
warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
|
|
361
394
|
else:
|
|
362
395
|
try:
|
|
363
396
|
import hf_transfer # type: ignore[no-redef]
|
|
@@ -372,12 +405,24 @@ def http_get(
|
|
|
372
405
|
headers = copy.deepcopy(headers) or {}
|
|
373
406
|
if resume_size > 0:
|
|
374
407
|
headers["Range"] = _adjust_range_header(headers.get("Range"), resume_size)
|
|
408
|
+
elif expected_size and expected_size > constants.MAX_HTTP_DOWNLOAD_SIZE:
|
|
409
|
+
# Any files over 50GB will not be available through basic http request.
|
|
410
|
+
# Setting the range header to 0-0 will force the server to return the file size in the Content-Range header.
|
|
411
|
+
# Since hf_transfer splits the download into chunks, the process will succeed afterwards.
|
|
412
|
+
if hf_transfer:
|
|
413
|
+
headers["Range"] = "bytes=0-0"
|
|
414
|
+
else:
|
|
415
|
+
raise ValueError(
|
|
416
|
+
"The file is too large to be downloaded using the regular download method. Use `hf_transfer` or `hf_xet` instead."
|
|
417
|
+
" Try `pip install hf_transfer` or `pip install hf_xet`."
|
|
418
|
+
)
|
|
375
419
|
|
|
376
420
|
r = _request_wrapper(
|
|
377
421
|
method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
|
|
378
422
|
)
|
|
423
|
+
|
|
379
424
|
hf_raise_for_status(r)
|
|
380
|
-
content_length = r
|
|
425
|
+
content_length = _get_file_length_from_http_response(r)
|
|
381
426
|
|
|
382
427
|
# NOTE: 'total' is the total number of bytes to download, not the number of bytes in the file.
|
|
383
428
|
# If the file is compressed, the number of bytes in the saved file will be higher than 'total'.
|
|
@@ -425,7 +470,7 @@ def http_get(
|
|
|
425
470
|
filename=temp_file.name,
|
|
426
471
|
max_files=constants.HF_TRANSFER_CONCURRENCY,
|
|
427
472
|
chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
|
|
428
|
-
headers=
|
|
473
|
+
headers=initial_headers,
|
|
429
474
|
parallel_failures=3,
|
|
430
475
|
max_retries=5,
|
|
431
476
|
**({"callback": progress.update} if supports_callback else {}),
|
|
@@ -537,11 +582,11 @@ def xet_get(
|
|
|
537
582
|
|
|
538
583
|
"""
|
|
539
584
|
try:
|
|
540
|
-
from hf_xet import
|
|
585
|
+
from hf_xet import PyXetDownloadInfo, download_files # type: ignore[no-redef]
|
|
541
586
|
except ImportError:
|
|
542
587
|
raise ValueError(
|
|
543
588
|
"To use optimized download using Xet storage, you need to install the hf_xet package. "
|
|
544
|
-
|
|
589
|
+
'Try `pip install "huggingface_hub[hf_xet]"` or `pip install hf_xet`.'
|
|
545
590
|
)
|
|
546
591
|
|
|
547
592
|
connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
|
|
@@ -552,8 +597,10 @@ def xet_get(
|
|
|
552
597
|
raise ValueError("Failed to refresh token using xet metadata.")
|
|
553
598
|
return connection_info.access_token, connection_info.expiration_unix_epoch
|
|
554
599
|
|
|
555
|
-
|
|
556
|
-
|
|
600
|
+
xet_download_info = [
|
|
601
|
+
PyXetDownloadInfo(
|
|
602
|
+
destination_path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, file_size=expected_size
|
|
603
|
+
)
|
|
557
604
|
]
|
|
558
605
|
|
|
559
606
|
if not displayed_filename:
|
|
@@ -578,7 +625,7 @@ def xet_get(
|
|
|
578
625
|
progress.update(progress_bytes)
|
|
579
626
|
|
|
580
627
|
download_files(
|
|
581
|
-
|
|
628
|
+
xet_download_info,
|
|
582
629
|
endpoint=connection_info.endpoint,
|
|
583
630
|
token_info=(connection_info.access_token, connection_info.expiration_unix_epoch),
|
|
584
631
|
token_refresher=token_refresher,
|
|
@@ -1672,6 +1719,7 @@ def _download_to_tmp_and_move(
|
|
|
1672
1719
|
"Falling back to regular HTTP download. "
|
|
1673
1720
|
"For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`"
|
|
1674
1721
|
)
|
|
1722
|
+
|
|
1675
1723
|
http_get(
|
|
1676
1724
|
url_to_download,
|
|
1677
1725
|
f,
|
|
@@ -708,14 +708,21 @@ class RepoFolder:
|
|
|
708
708
|
|
|
709
709
|
@dataclass
|
|
710
710
|
class InferenceProviderMapping:
|
|
711
|
+
hf_model_id: str
|
|
711
712
|
status: Literal["live", "staging"]
|
|
712
713
|
provider_id: str
|
|
713
714
|
task: str
|
|
714
715
|
|
|
716
|
+
adapter: Optional[str] = None
|
|
717
|
+
adapter_weights_path: Optional[str] = None
|
|
718
|
+
|
|
715
719
|
def __init__(self, **kwargs):
|
|
720
|
+
self.hf_model_id = kwargs.pop("hf_model_id")
|
|
716
721
|
self.status = kwargs.pop("status")
|
|
717
722
|
self.provider_id = kwargs.pop("providerId")
|
|
718
723
|
self.task = kwargs.pop("task")
|
|
724
|
+
self.adapter = kwargs.pop("adapter", None)
|
|
725
|
+
self.adapter_weights_path = kwargs.pop("adapterWeightsPath", None)
|
|
719
726
|
self.__dict__.update(**kwargs)
|
|
720
727
|
|
|
721
728
|
|
|
@@ -847,7 +854,9 @@ class ModelInfo:
|
|
|
847
854
|
self.inference_provider_mapping = kwargs.pop("inferenceProviderMapping", None)
|
|
848
855
|
if self.inference_provider_mapping:
|
|
849
856
|
self.inference_provider_mapping = {
|
|
850
|
-
provider: InferenceProviderMapping(
|
|
857
|
+
provider: InferenceProviderMapping(
|
|
858
|
+
**{**value, "hf_model_id": self.id}
|
|
859
|
+
) # little hack to simplify Inference Providers logic
|
|
851
860
|
for provider, value in self.inference_provider_mapping.items()
|
|
852
861
|
}
|
|
853
862
|
|
|
@@ -4466,18 +4475,17 @@ class HfApi:
|
|
|
4466
4475
|
expand="xetEnabled",
|
|
4467
4476
|
token=token,
|
|
4468
4477
|
).xet_enabled
|
|
4469
|
-
|
|
4470
|
-
isinstance(addition.path_or_fileobj,
|
|
4471
|
-
for addition in new_lfs_additions_to_upload
|
|
4478
|
+
has_buffered_io_data = any(
|
|
4479
|
+
isinstance(addition.path_or_fileobj, io.BufferedIOBase) for addition in new_lfs_additions_to_upload
|
|
4472
4480
|
)
|
|
4473
|
-
if xet_enabled and not
|
|
4481
|
+
if xet_enabled and not has_buffered_io_data and is_xet_available():
|
|
4474
4482
|
logger.info("Uploading files using Xet Storage..")
|
|
4475
4483
|
_upload_xet_files(**upload_kwargs, create_pr=create_pr) # type: ignore [arg-type]
|
|
4476
4484
|
else:
|
|
4477
4485
|
if xet_enabled and is_xet_available():
|
|
4478
|
-
if
|
|
4486
|
+
if has_buffered_io_data:
|
|
4479
4487
|
logger.warning(
|
|
4480
|
-
"Uploading files as
|
|
4488
|
+
"Uploading files as a binary IO buffer is not supported by Xet Storage. "
|
|
4481
4489
|
"Falling back to HTTP upload."
|
|
4482
4490
|
)
|
|
4483
4491
|
_upload_lfs_files(**upload_kwargs, num_threads=num_threads) # type: ignore [arg-type]
|
|
@@ -7564,8 +7572,13 @@ class HfApi:
|
|
|
7564
7572
|
revision: Optional[str] = None,
|
|
7565
7573
|
task: Optional[str] = None,
|
|
7566
7574
|
custom_image: Optional[Dict] = None,
|
|
7575
|
+
env: Optional[Dict[str, str]] = None,
|
|
7567
7576
|
secrets: Optional[Dict[str, str]] = None,
|
|
7568
7577
|
type: InferenceEndpointType = InferenceEndpointType.PROTECTED,
|
|
7578
|
+
domain: Optional[str] = None,
|
|
7579
|
+
path: Optional[str] = None,
|
|
7580
|
+
cache_http_responses: Optional[bool] = None,
|
|
7581
|
+
tags: Optional[List[str]] = None,
|
|
7569
7582
|
namespace: Optional[str] = None,
|
|
7570
7583
|
token: Union[bool, str, None] = None,
|
|
7571
7584
|
) -> InferenceEndpoint:
|
|
@@ -7603,10 +7616,20 @@ class HfApi:
|
|
|
7603
7616
|
custom_image (`Dict`, *optional*):
|
|
7604
7617
|
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
|
|
7605
7618
|
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
|
|
7619
|
+
env (`Dict[str, str]`, *optional*):
|
|
7620
|
+
Non-secret environment variables to inject in the container environment.
|
|
7606
7621
|
secrets (`Dict[str, str]`, *optional*):
|
|
7607
7622
|
Secret values to inject in the container environment.
|
|
7608
7623
|
type ([`InferenceEndpointType]`, *optional*):
|
|
7609
7624
|
The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`.
|
|
7625
|
+
domain (`str`, *optional*):
|
|
7626
|
+
The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
|
|
7627
|
+
path (`str`, *optional*):
|
|
7628
|
+
The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
|
|
7629
|
+
cache_http_responses (`bool`, *optional*):
|
|
7630
|
+
Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`.
|
|
7631
|
+
tags (`List[str]`, *optional*):
|
|
7632
|
+
A list of tags to associate with the Inference Endpoint.
|
|
7610
7633
|
namespace (`str`, *optional*):
|
|
7611
7634
|
The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
|
|
7612
7635
|
token (Union[bool, str, None], optional):
|
|
@@ -7657,17 +7680,18 @@ class HfApi:
|
|
|
7657
7680
|
... type="protected",
|
|
7658
7681
|
... instance_size="x1",
|
|
7659
7682
|
... instance_type="nvidia-a10g",
|
|
7683
|
+
... env={
|
|
7684
|
+
... "MAX_BATCH_PREFILL_TOKENS": "2048",
|
|
7685
|
+
... "MAX_INPUT_LENGTH": "1024",
|
|
7686
|
+
... "MAX_TOTAL_TOKENS": "1512",
|
|
7687
|
+
... "MODEL_ID": "/repository"
|
|
7688
|
+
... },
|
|
7660
7689
|
... custom_image={
|
|
7661
7690
|
... "health_route": "/health",
|
|
7662
|
-
... "env": {
|
|
7663
|
-
... "MAX_BATCH_PREFILL_TOKENS": "2048",
|
|
7664
|
-
... "MAX_INPUT_LENGTH": "1024",
|
|
7665
|
-
... "MAX_TOTAL_TOKENS": "1512",
|
|
7666
|
-
... "MODEL_ID": "/repository"
|
|
7667
|
-
... },
|
|
7668
7691
|
... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
|
|
7669
7692
|
... },
|
|
7670
7693
|
... secrets={"MY_SECRET_KEY": "secret_value"},
|
|
7694
|
+
... tags=["dev", "text-generation"],
|
|
7671
7695
|
... )
|
|
7672
7696
|
|
|
7673
7697
|
```
|
|
@@ -7701,8 +7725,21 @@ class HfApi:
|
|
|
7701
7725
|
},
|
|
7702
7726
|
"type": type,
|
|
7703
7727
|
}
|
|
7728
|
+
if env:
|
|
7729
|
+
payload["model"]["env"] = env
|
|
7704
7730
|
if secrets:
|
|
7705
7731
|
payload["model"]["secrets"] = secrets
|
|
7732
|
+
if domain is not None or path is not None:
|
|
7733
|
+
payload["route"] = {}
|
|
7734
|
+
if domain is not None:
|
|
7735
|
+
payload["route"]["domain"] = domain
|
|
7736
|
+
if path is not None:
|
|
7737
|
+
payload["route"]["path"] = path
|
|
7738
|
+
if cache_http_responses is not None:
|
|
7739
|
+
payload["cacheHttpResponses"] = cache_http_responses
|
|
7740
|
+
if tags is not None:
|
|
7741
|
+
payload["tags"] = tags
|
|
7742
|
+
|
|
7706
7743
|
response = get_session().post(
|
|
7707
7744
|
f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}",
|
|
7708
7745
|
headers=self._build_hf_headers(token=token),
|
|
@@ -7864,15 +7901,21 @@ class HfApi:
|
|
|
7864
7901
|
revision: Optional[str] = None,
|
|
7865
7902
|
task: Optional[str] = None,
|
|
7866
7903
|
custom_image: Optional[Dict] = None,
|
|
7904
|
+
env: Optional[Dict[str, str]] = None,
|
|
7867
7905
|
secrets: Optional[Dict[str, str]] = None,
|
|
7906
|
+
# Route update
|
|
7907
|
+
domain: Optional[str] = None,
|
|
7908
|
+
path: Optional[str] = None,
|
|
7868
7909
|
# Other
|
|
7910
|
+
cache_http_responses: Optional[bool] = None,
|
|
7911
|
+
tags: Optional[List[str]] = None,
|
|
7869
7912
|
namespace: Optional[str] = None,
|
|
7870
7913
|
token: Union[bool, str, None] = None,
|
|
7871
7914
|
) -> InferenceEndpoint:
|
|
7872
7915
|
"""Update an Inference Endpoint.
|
|
7873
7916
|
|
|
7874
|
-
This method allows the update of either the compute configuration, the deployed model,
|
|
7875
|
-
optional but at least one must be provided.
|
|
7917
|
+
This method allows the update of either the compute configuration, the deployed model, the route, or any combination.
|
|
7918
|
+
All arguments are optional but at least one must be provided.
|
|
7876
7919
|
|
|
7877
7920
|
For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`].
|
|
7878
7921
|
|
|
@@ -7904,8 +7947,21 @@ class HfApi:
|
|
|
7904
7947
|
custom_image (`Dict`, *optional*):
|
|
7905
7948
|
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
|
|
7906
7949
|
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
|
|
7950
|
+
env (`Dict[str, str]`, *optional*):
|
|
7951
|
+
Non-secret environment variables to inject in the container environment
|
|
7907
7952
|
secrets (`Dict[str, str]`, *optional*):
|
|
7908
7953
|
Secret values to inject in the container environment.
|
|
7954
|
+
|
|
7955
|
+
domain (`str`, *optional*):
|
|
7956
|
+
The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
|
|
7957
|
+
path (`str`, *optional*):
|
|
7958
|
+
The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
|
|
7959
|
+
|
|
7960
|
+
cache_http_responses (`bool`, *optional*):
|
|
7961
|
+
Whether to cache HTTP responses from the Inference Endpoint.
|
|
7962
|
+
tags (`List[str]`, *optional*):
|
|
7963
|
+
A list of tags to associate with the Inference Endpoint.
|
|
7964
|
+
|
|
7909
7965
|
namespace (`str`, *optional*):
|
|
7910
7966
|
The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace.
|
|
7911
7967
|
token (Union[bool, str, None], optional):
|
|
@@ -7943,8 +7999,18 @@ class HfApi:
|
|
|
7943
7999
|
payload["model"]["task"] = task
|
|
7944
8000
|
if custom_image is not None:
|
|
7945
8001
|
payload["model"]["image"] = {"custom": custom_image}
|
|
8002
|
+
if env is not None:
|
|
8003
|
+
payload["model"]["env"] = env
|
|
7946
8004
|
if secrets is not None:
|
|
7947
8005
|
payload["model"]["secrets"] = secrets
|
|
8006
|
+
if domain is not None:
|
|
8007
|
+
payload["route"]["domain"] = domain
|
|
8008
|
+
if path is not None:
|
|
8009
|
+
payload["route"]["path"] = path
|
|
8010
|
+
if cache_http_responses is not None:
|
|
8011
|
+
payload["cacheHttpResponses"] = cache_http_responses
|
|
8012
|
+
if tags is not None:
|
|
8013
|
+
payload["tags"] = tags
|
|
7948
8014
|
|
|
7949
8015
|
response = get_session().put(
|
|
7950
8016
|
f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",
|