huggingface-hub 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +145 -46
- huggingface_hub/_commit_api.py +168 -119
- huggingface_hub/_commit_scheduler.py +15 -15
- huggingface_hub/_inference_endpoints.py +15 -12
- huggingface_hub/_jobs_api.py +301 -0
- huggingface_hub/_local_folder.py +18 -3
- huggingface_hub/_login.py +31 -63
- huggingface_hub/_oauth.py +460 -0
- huggingface_hub/_snapshot_download.py +239 -80
- huggingface_hub/_space_api.py +5 -5
- huggingface_hub/_tensorboard_logger.py +15 -19
- huggingface_hub/_upload_large_folder.py +172 -76
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +13 -25
- huggingface_hub/{commands → cli}/__init__.py +1 -15
- huggingface_hub/cli/_cli_utils.py +173 -0
- huggingface_hub/cli/auth.py +147 -0
- huggingface_hub/cli/cache.py +841 -0
- huggingface_hub/cli/download.py +189 -0
- huggingface_hub/cli/hf.py +60 -0
- huggingface_hub/cli/inference_endpoints.py +377 -0
- huggingface_hub/cli/jobs.py +772 -0
- huggingface_hub/cli/lfs.py +175 -0
- huggingface_hub/cli/repo.py +315 -0
- huggingface_hub/cli/repo_files.py +94 -0
- huggingface_hub/{commands/env.py → cli/system.py} +10 -13
- huggingface_hub/cli/upload.py +294 -0
- huggingface_hub/cli/upload_large_folder.py +117 -0
- huggingface_hub/community.py +20 -12
- huggingface_hub/constants.py +38 -53
- huggingface_hub/dataclasses.py +609 -0
- huggingface_hub/errors.py +80 -30
- huggingface_hub/fastai_utils.py +30 -41
- huggingface_hub/file_download.py +435 -351
- huggingface_hub/hf_api.py +2050 -1124
- huggingface_hub/hf_file_system.py +269 -152
- huggingface_hub/hub_mixin.py +43 -63
- huggingface_hub/inference/_client.py +347 -434
- huggingface_hub/inference/_common.py +133 -121
- huggingface_hub/inference/_generated/_async_client.py +397 -541
- huggingface_hub/inference/_generated/types/__init__.py +5 -1
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +59 -23
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
- huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/__init__.py +0 -0
- huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
- huggingface_hub/inference/_mcp/agent.py +100 -0
- huggingface_hub/inference/_mcp/cli.py +247 -0
- huggingface_hub/inference/_mcp/constants.py +81 -0
- huggingface_hub/inference/_mcp/mcp_client.py +395 -0
- huggingface_hub/inference/_mcp/types.py +45 -0
- huggingface_hub/inference/_mcp/utils.py +128 -0
- huggingface_hub/inference/_providers/__init__.py +82 -7
- huggingface_hub/inference/_providers/_common.py +129 -27
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cerebras.py +1 -1
- huggingface_hub/inference/_providers/clarifai.py +13 -0
- huggingface_hub/inference/_providers/cohere.py +20 -3
- huggingface_hub/inference/_providers/fal_ai.py +183 -56
- huggingface_hub/inference/_providers/featherless_ai.py +38 -0
- huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
- huggingface_hub/inference/_providers/groq.py +9 -0
- huggingface_hub/inference/_providers/hf_inference.py +69 -30
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +33 -5
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +44 -0
- huggingface_hub/inference/_providers/openai.py +3 -1
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/replicate.py +31 -13
- huggingface_hub/inference/_providers/sambanova.py +18 -4
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/together.py +20 -5
- huggingface_hub/inference/_providers/wavespeed.py +138 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +33 -100
- huggingface_hub/repocard.py +34 -38
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +12 -15
- huggingface_hub/serialization/_dduf.py +8 -8
- huggingface_hub/serialization/_torch.py +69 -69
- huggingface_hub/utils/__init__.py +19 -8
- huggingface_hub/utils/_auth.py +7 -7
- huggingface_hub/utils/_cache_manager.py +92 -147
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +55 -0
- huggingface_hub/utils/_experimental.py +7 -5
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +5 -5
- huggingface_hub/utils/_headers.py +8 -30
- huggingface_hub/utils/_http.py +398 -239
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +61 -24
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +4 -4
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +55 -74
- huggingface_hub/utils/_verification.py +167 -0
- huggingface_hub/utils/_xet.py +64 -17
- huggingface_hub/utils/_xet_progress_reporting.py +162 -0
- huggingface_hub/utils/insecure_hashlib.py +3 -5
- huggingface_hub/utils/logging.py +8 -11
- huggingface_hub/utils/tqdm.py +5 -4
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -85
- huggingface_hub-1.1.3.dist-info/RECORD +155 -0
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
- huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
- huggingface_hub/commands/delete_cache.py +0 -474
- huggingface_hub/commands/download.py +0 -200
- huggingface_hub/commands/huggingface_cli.py +0 -61
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo_files.py +0 -128
- huggingface_hub/commands/scan_cache.py +0 -181
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -314
- huggingface_hub/commands/upload_large_folder.py +0 -129
- huggingface_hub/commands/user.py +0 -304
- huggingface_hub/commands/version.py +0 -37
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.31.0rc0.dist-info/RECORD +0 -135
- huggingface_hub-0.31.0rc0.dist-info/entry_points.txt +0 -6
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 202-present, the HuggingFace Inc. team.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Contains command to download files from the Hub with the CLI.
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
hf download --help
|
|
19
|
+
|
|
20
|
+
# Download file
|
|
21
|
+
hf download gpt2 config.json
|
|
22
|
+
|
|
23
|
+
# Download entire repo
|
|
24
|
+
hf download fffiloni/zeroscope --repo-type=space --revision=refs/pr/78
|
|
25
|
+
|
|
26
|
+
# Download repo with filters
|
|
27
|
+
hf download gpt2 --include="*.safetensors"
|
|
28
|
+
|
|
29
|
+
# Download with token
|
|
30
|
+
hf download Wauplin/private-model --token=hf_***
|
|
31
|
+
|
|
32
|
+
# Download quietly (no progress bar, no warnings, only the returned path)
|
|
33
|
+
hf download gpt2 config.json --quiet
|
|
34
|
+
|
|
35
|
+
# Download to local dir
|
|
36
|
+
hf download gpt2 --local-dir=./models/gpt2
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
import warnings
|
|
40
|
+
from typing import Annotated, Optional, Union
|
|
41
|
+
|
|
42
|
+
import typer
|
|
43
|
+
|
|
44
|
+
from huggingface_hub import logging
|
|
45
|
+
from huggingface_hub._snapshot_download import snapshot_download
|
|
46
|
+
from huggingface_hub.file_download import DryRunFileInfo, hf_hub_download
|
|
47
|
+
from huggingface_hub.utils import _format_size, disable_progress_bars, enable_progress_bars, tabulate
|
|
48
|
+
|
|
49
|
+
from ._cli_utils import RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
logger = logging.get_logger(__name__)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def download(
|
|
56
|
+
repo_id: RepoIdArg,
|
|
57
|
+
filenames: Annotated[
|
|
58
|
+
Optional[list[str]],
|
|
59
|
+
typer.Argument(
|
|
60
|
+
help="Files to download (e.g. `config.json`, `data/metadata.jsonl`).",
|
|
61
|
+
),
|
|
62
|
+
] = None,
|
|
63
|
+
repo_type: RepoTypeOpt = RepoTypeOpt.model,
|
|
64
|
+
revision: RevisionOpt = None,
|
|
65
|
+
include: Annotated[
|
|
66
|
+
Optional[list[str]],
|
|
67
|
+
typer.Option(
|
|
68
|
+
help="Glob patterns to include from files to download. eg: *.json",
|
|
69
|
+
),
|
|
70
|
+
] = None,
|
|
71
|
+
exclude: Annotated[
|
|
72
|
+
Optional[list[str]],
|
|
73
|
+
typer.Option(
|
|
74
|
+
help="Glob patterns to exclude from files to download.",
|
|
75
|
+
),
|
|
76
|
+
] = None,
|
|
77
|
+
cache_dir: Annotated[
|
|
78
|
+
Optional[str],
|
|
79
|
+
typer.Option(
|
|
80
|
+
help="Directory where to save files.",
|
|
81
|
+
),
|
|
82
|
+
] = None,
|
|
83
|
+
local_dir: Annotated[
|
|
84
|
+
Optional[str],
|
|
85
|
+
typer.Option(
|
|
86
|
+
help="If set, the downloaded file will be placed under this directory. Check out https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-local-folder for more details.",
|
|
87
|
+
),
|
|
88
|
+
] = None,
|
|
89
|
+
force_download: Annotated[
|
|
90
|
+
bool,
|
|
91
|
+
typer.Option(
|
|
92
|
+
help="If True, the files will be downloaded even if they are already cached.",
|
|
93
|
+
),
|
|
94
|
+
] = False,
|
|
95
|
+
dry_run: Annotated[
|
|
96
|
+
bool,
|
|
97
|
+
typer.Option(
|
|
98
|
+
help="If True, perform a dry run without actually downloading the file.",
|
|
99
|
+
),
|
|
100
|
+
] = False,
|
|
101
|
+
token: TokenOpt = None,
|
|
102
|
+
quiet: Annotated[
|
|
103
|
+
bool,
|
|
104
|
+
typer.Option(
|
|
105
|
+
help="If True, progress bars are disabled and only the path to the download files is printed.",
|
|
106
|
+
),
|
|
107
|
+
] = False,
|
|
108
|
+
max_workers: Annotated[
|
|
109
|
+
int,
|
|
110
|
+
typer.Option(
|
|
111
|
+
help="Maximum number of workers to use for downloading files. Default is 8.",
|
|
112
|
+
),
|
|
113
|
+
] = 8,
|
|
114
|
+
) -> None:
|
|
115
|
+
"""Download files from the Hub."""
|
|
116
|
+
|
|
117
|
+
def run_download() -> Union[str, DryRunFileInfo, list[DryRunFileInfo]]:
|
|
118
|
+
filenames_list = filenames if filenames is not None else []
|
|
119
|
+
# Warn user if patterns are ignored
|
|
120
|
+
if len(filenames_list) > 0:
|
|
121
|
+
if include is not None and len(include) > 0:
|
|
122
|
+
warnings.warn("Ignoring `--include` since filenames have being explicitly set.")
|
|
123
|
+
if exclude is not None and len(exclude) > 0:
|
|
124
|
+
warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.")
|
|
125
|
+
|
|
126
|
+
# Single file to download: use `hf_hub_download`
|
|
127
|
+
if len(filenames_list) == 1:
|
|
128
|
+
return hf_hub_download(
|
|
129
|
+
repo_id=repo_id,
|
|
130
|
+
repo_type=repo_type.value,
|
|
131
|
+
revision=revision,
|
|
132
|
+
filename=filenames_list[0],
|
|
133
|
+
cache_dir=cache_dir,
|
|
134
|
+
force_download=force_download,
|
|
135
|
+
token=token,
|
|
136
|
+
local_dir=local_dir,
|
|
137
|
+
library_name="huggingface-cli",
|
|
138
|
+
dry_run=dry_run,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Otherwise: use `snapshot_download` to ensure all files comes from same revision
|
|
142
|
+
if len(filenames_list) == 0:
|
|
143
|
+
allow_patterns = include
|
|
144
|
+
ignore_patterns = exclude
|
|
145
|
+
else:
|
|
146
|
+
allow_patterns = filenames_list
|
|
147
|
+
ignore_patterns = None
|
|
148
|
+
|
|
149
|
+
return snapshot_download(
|
|
150
|
+
repo_id=repo_id,
|
|
151
|
+
repo_type=repo_type.value,
|
|
152
|
+
revision=revision,
|
|
153
|
+
allow_patterns=allow_patterns,
|
|
154
|
+
ignore_patterns=ignore_patterns,
|
|
155
|
+
force_download=force_download,
|
|
156
|
+
cache_dir=cache_dir,
|
|
157
|
+
token=token,
|
|
158
|
+
local_dir=local_dir,
|
|
159
|
+
library_name="huggingface-cli",
|
|
160
|
+
max_workers=max_workers,
|
|
161
|
+
dry_run=dry_run,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def _print_result(result: Union[str, DryRunFileInfo, list[DryRunFileInfo]]) -> None:
|
|
165
|
+
if isinstance(result, str):
|
|
166
|
+
print(result)
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
# Print dry run info
|
|
170
|
+
if isinstance(result, DryRunFileInfo):
|
|
171
|
+
result = [result]
|
|
172
|
+
print(
|
|
173
|
+
f"[dry-run] Will download {len([r for r in result if r.will_download])} files (out of {len(result)}) totalling {_format_size(sum(r.file_size for r in result if r.will_download))}."
|
|
174
|
+
)
|
|
175
|
+
columns = ["File", "Bytes to download"]
|
|
176
|
+
items: list[list[Union[str, int]]] = []
|
|
177
|
+
for info in sorted(result, key=lambda x: x.filename):
|
|
178
|
+
items.append([info.filename, _format_size(info.file_size) if info.will_download else "-"])
|
|
179
|
+
print(tabulate(items, headers=columns))
|
|
180
|
+
|
|
181
|
+
if quiet:
|
|
182
|
+
disable_progress_bars()
|
|
183
|
+
with warnings.catch_warnings():
|
|
184
|
+
warnings.simplefilter("ignore")
|
|
185
|
+
_print_result(run_download())
|
|
186
|
+
enable_progress_bars()
|
|
187
|
+
else:
|
|
188
|
+
_print_result(run_download())
|
|
189
|
+
logging.set_verbosity_warning()
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from huggingface_hub.cli._cli_utils import check_cli_update, typer_factory
|
|
17
|
+
from huggingface_hub.cli.auth import auth_cli
|
|
18
|
+
from huggingface_hub.cli.cache import cache_cli
|
|
19
|
+
from huggingface_hub.cli.download import download
|
|
20
|
+
from huggingface_hub.cli.inference_endpoints import ie_cli
|
|
21
|
+
from huggingface_hub.cli.jobs import jobs_cli
|
|
22
|
+
from huggingface_hub.cli.lfs import lfs_enable_largefiles, lfs_multipart_upload
|
|
23
|
+
from huggingface_hub.cli.repo import repo_cli
|
|
24
|
+
from huggingface_hub.cli.repo_files import repo_files_cli
|
|
25
|
+
from huggingface_hub.cli.system import env, version
|
|
26
|
+
from huggingface_hub.cli.upload import upload
|
|
27
|
+
from huggingface_hub.cli.upload_large_folder import upload_large_folder
|
|
28
|
+
from huggingface_hub.utils import logging
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
app = typer_factory(help="Hugging Face Hub CLI")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# top level single commands (defined in their respective files)
|
|
35
|
+
app.command(help="Download files from the Hub.")(download)
|
|
36
|
+
app.command(help="Upload a file or a folder to the Hub.")(upload)
|
|
37
|
+
app.command(help="Upload a large folder to the Hub. Recommended for resumable uploads.")(upload_large_folder)
|
|
38
|
+
app.command(name="env", help="Print information about the environment.")(env)
|
|
39
|
+
app.command(help="Print information about the hf version.")(version)
|
|
40
|
+
app.command(help="Configure your repository to enable upload of files > 5GB.", hidden=True)(lfs_enable_largefiles)
|
|
41
|
+
app.command(help="Upload large files to the Hub.", hidden=True)(lfs_multipart_upload)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# command groups
|
|
45
|
+
app.add_typer(auth_cli, name="auth")
|
|
46
|
+
app.add_typer(cache_cli, name="cache")
|
|
47
|
+
app.add_typer(repo_cli, name="repo")
|
|
48
|
+
app.add_typer(repo_files_cli, name="repo-files")
|
|
49
|
+
app.add_typer(jobs_cli, name="jobs")
|
|
50
|
+
app.add_typer(ie_cli, name="endpoints")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def main():
|
|
54
|
+
logging.set_verbosity_info()
|
|
55
|
+
check_cli_update()
|
|
56
|
+
app()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
if __name__ == "__main__":
|
|
60
|
+
main()
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
"""CLI commands for Hugging Face Inference Endpoints."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Annotated, Optional
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
|
|
8
|
+
from huggingface_hub._inference_endpoints import InferenceEndpoint
|
|
9
|
+
from huggingface_hub.errors import HfHubHTTPError
|
|
10
|
+
|
|
11
|
+
from ._cli_utils import TokenOpt, get_hf_api, typer_factory
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
ie_cli = typer_factory(help="Manage Hugging Face Inference Endpoints.")
|
|
15
|
+
|
|
16
|
+
catalog_app = typer_factory(help="Interact with the Inference Endpoints catalog.")
|
|
17
|
+
|
|
18
|
+
NameArg = Annotated[
|
|
19
|
+
str,
|
|
20
|
+
typer.Argument(help="Endpoint name."),
|
|
21
|
+
]
|
|
22
|
+
NameOpt = Annotated[
|
|
23
|
+
Optional[str],
|
|
24
|
+
typer.Option(help="Endpoint name."),
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
NamespaceOpt = Annotated[
|
|
28
|
+
Optional[str],
|
|
29
|
+
typer.Option(
|
|
30
|
+
help="The namespace associated with the Inference Endpoint. Defaults to the current user's namespace.",
|
|
31
|
+
),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _print_endpoint(endpoint: InferenceEndpoint) -> None:
|
|
36
|
+
typer.echo(json.dumps(endpoint.raw, indent=2, sort_keys=True))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@ie_cli.command()
|
|
40
|
+
def ls(
|
|
41
|
+
namespace: NamespaceOpt = None,
|
|
42
|
+
token: TokenOpt = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Lists all Inference Endpoints for the given namespace."""
|
|
45
|
+
api = get_hf_api(token=token)
|
|
46
|
+
try:
|
|
47
|
+
endpoints = api.list_inference_endpoints(namespace=namespace, token=token)
|
|
48
|
+
except HfHubHTTPError as error:
|
|
49
|
+
typer.echo(f"Listing failed: {error}")
|
|
50
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
51
|
+
|
|
52
|
+
typer.echo(
|
|
53
|
+
json.dumps(
|
|
54
|
+
{"items": [endpoint.raw for endpoint in endpoints]},
|
|
55
|
+
indent=2,
|
|
56
|
+
sort_keys=True,
|
|
57
|
+
)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@ie_cli.command(name="deploy")
|
|
62
|
+
def deploy(
|
|
63
|
+
name: NameArg,
|
|
64
|
+
repo: Annotated[
|
|
65
|
+
str,
|
|
66
|
+
typer.Option(
|
|
67
|
+
help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
|
|
68
|
+
),
|
|
69
|
+
],
|
|
70
|
+
framework: Annotated[
|
|
71
|
+
str,
|
|
72
|
+
typer.Option(
|
|
73
|
+
help="The machine learning framework used for the model (e.g. 'vllm').",
|
|
74
|
+
),
|
|
75
|
+
],
|
|
76
|
+
accelerator: Annotated[
|
|
77
|
+
str,
|
|
78
|
+
typer.Option(
|
|
79
|
+
help="The hardware accelerator to be used for inference (e.g. 'cpu').",
|
|
80
|
+
),
|
|
81
|
+
],
|
|
82
|
+
instance_size: Annotated[
|
|
83
|
+
str,
|
|
84
|
+
typer.Option(
|
|
85
|
+
help="The size or type of the instance to be used for hosting the model (e.g. 'x4').",
|
|
86
|
+
),
|
|
87
|
+
],
|
|
88
|
+
instance_type: Annotated[
|
|
89
|
+
str,
|
|
90
|
+
typer.Option(
|
|
91
|
+
help="The cloud instance type where the Inference Endpoint will be deployed (e.g. 'intel-icl').",
|
|
92
|
+
),
|
|
93
|
+
],
|
|
94
|
+
region: Annotated[
|
|
95
|
+
str,
|
|
96
|
+
typer.Option(
|
|
97
|
+
help="The cloud region in which the Inference Endpoint will be created (e.g. 'us-east-1').",
|
|
98
|
+
),
|
|
99
|
+
],
|
|
100
|
+
vendor: Annotated[
|
|
101
|
+
str,
|
|
102
|
+
typer.Option(
|
|
103
|
+
help="The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. 'aws').",
|
|
104
|
+
),
|
|
105
|
+
],
|
|
106
|
+
*,
|
|
107
|
+
namespace: NamespaceOpt = None,
|
|
108
|
+
task: Annotated[
|
|
109
|
+
Optional[str],
|
|
110
|
+
typer.Option(
|
|
111
|
+
help="The task on which to deploy the model (e.g. 'text-classification').",
|
|
112
|
+
),
|
|
113
|
+
] = None,
|
|
114
|
+
token: TokenOpt = None,
|
|
115
|
+
) -> None:
|
|
116
|
+
"""Deploy an Inference Endpoint from a Hub repository."""
|
|
117
|
+
api = get_hf_api(token=token)
|
|
118
|
+
endpoint = api.create_inference_endpoint(
|
|
119
|
+
name=name,
|
|
120
|
+
repository=repo,
|
|
121
|
+
framework=framework,
|
|
122
|
+
accelerator=accelerator,
|
|
123
|
+
instance_size=instance_size,
|
|
124
|
+
instance_type=instance_type,
|
|
125
|
+
region=region,
|
|
126
|
+
vendor=vendor,
|
|
127
|
+
namespace=namespace,
|
|
128
|
+
task=task,
|
|
129
|
+
token=token,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
_print_endpoint(endpoint)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@catalog_app.command(name="deploy")
|
|
136
|
+
def deploy_from_catalog(
|
|
137
|
+
repo: Annotated[
|
|
138
|
+
str,
|
|
139
|
+
typer.Option(
|
|
140
|
+
help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
|
|
141
|
+
),
|
|
142
|
+
],
|
|
143
|
+
name: NameOpt = None,
|
|
144
|
+
namespace: NamespaceOpt = None,
|
|
145
|
+
token: TokenOpt = None,
|
|
146
|
+
) -> None:
|
|
147
|
+
"""Deploy an Inference Endpoint from the Model Catalog."""
|
|
148
|
+
api = get_hf_api(token=token)
|
|
149
|
+
try:
|
|
150
|
+
endpoint = api.create_inference_endpoint_from_catalog(
|
|
151
|
+
repo_id=repo,
|
|
152
|
+
name=name,
|
|
153
|
+
namespace=namespace,
|
|
154
|
+
token=token,
|
|
155
|
+
)
|
|
156
|
+
except HfHubHTTPError as error:
|
|
157
|
+
typer.echo(f"Deployment failed: {error}")
|
|
158
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
159
|
+
|
|
160
|
+
_print_endpoint(endpoint)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def list_catalog(
|
|
164
|
+
token: TokenOpt = None,
|
|
165
|
+
) -> None:
|
|
166
|
+
"""List available Catalog models."""
|
|
167
|
+
api = get_hf_api(token=token)
|
|
168
|
+
try:
|
|
169
|
+
models = api.list_inference_catalog(token=token)
|
|
170
|
+
except HfHubHTTPError as error:
|
|
171
|
+
typer.echo(f"Catalog fetch failed: {error}")
|
|
172
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
173
|
+
|
|
174
|
+
typer.echo(json.dumps({"models": models}, indent=2, sort_keys=True))
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
catalog_app.command(name="ls")(list_catalog)
|
|
178
|
+
ie_cli.command(name="list-catalog", help="List available Catalog models.", hidden=True)(list_catalog)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
ie_cli.add_typer(catalog_app, name="catalog")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@ie_cli.command()
|
|
185
|
+
def describe(
|
|
186
|
+
name: NameArg,
|
|
187
|
+
namespace: NamespaceOpt = None,
|
|
188
|
+
token: TokenOpt = None,
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Get information about an existing endpoint."""
|
|
191
|
+
api = get_hf_api(token=token)
|
|
192
|
+
try:
|
|
193
|
+
endpoint = api.get_inference_endpoint(name=name, namespace=namespace, token=token)
|
|
194
|
+
except HfHubHTTPError as error:
|
|
195
|
+
typer.echo(f"Fetch failed: {error}")
|
|
196
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
197
|
+
|
|
198
|
+
_print_endpoint(endpoint)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@ie_cli.command()
|
|
202
|
+
def update(
|
|
203
|
+
name: NameArg,
|
|
204
|
+
namespace: NamespaceOpt = None,
|
|
205
|
+
repo: Annotated[
|
|
206
|
+
Optional[str],
|
|
207
|
+
typer.Option(
|
|
208
|
+
help="The name of the model repository associated with the Inference Endpoint (e.g. 'openai/gpt-oss-120b').",
|
|
209
|
+
),
|
|
210
|
+
] = None,
|
|
211
|
+
accelerator: Annotated[
|
|
212
|
+
Optional[str],
|
|
213
|
+
typer.Option(
|
|
214
|
+
help="The hardware accelerator to be used for inference (e.g. 'cpu').",
|
|
215
|
+
),
|
|
216
|
+
] = None,
|
|
217
|
+
instance_size: Annotated[
|
|
218
|
+
Optional[str],
|
|
219
|
+
typer.Option(
|
|
220
|
+
help="The size or type of the instance to be used for hosting the model (e.g. 'x4').",
|
|
221
|
+
),
|
|
222
|
+
] = None,
|
|
223
|
+
instance_type: Annotated[
|
|
224
|
+
Optional[str],
|
|
225
|
+
typer.Option(
|
|
226
|
+
help="The cloud instance type where the Inference Endpoint will be deployed (e.g. 'intel-icl').",
|
|
227
|
+
),
|
|
228
|
+
] = None,
|
|
229
|
+
framework: Annotated[
|
|
230
|
+
Optional[str],
|
|
231
|
+
typer.Option(
|
|
232
|
+
help="The machine learning framework used for the model (e.g. 'custom').",
|
|
233
|
+
),
|
|
234
|
+
] = None,
|
|
235
|
+
revision: Annotated[
|
|
236
|
+
Optional[str],
|
|
237
|
+
typer.Option(
|
|
238
|
+
help="The specific model revision to deploy on the Inference Endpoint (e.g. '6c0e6080953db56375760c0471a8c5f2929baf11').",
|
|
239
|
+
),
|
|
240
|
+
] = None,
|
|
241
|
+
task: Annotated[
|
|
242
|
+
Optional[str],
|
|
243
|
+
typer.Option(
|
|
244
|
+
help="The task on which to deploy the model (e.g. 'text-classification').",
|
|
245
|
+
),
|
|
246
|
+
] = None,
|
|
247
|
+
min_replica: Annotated[
|
|
248
|
+
Optional[int],
|
|
249
|
+
typer.Option(
|
|
250
|
+
help="The minimum number of replicas (instances) to keep running for the Inference Endpoint.",
|
|
251
|
+
),
|
|
252
|
+
] = None,
|
|
253
|
+
max_replica: Annotated[
|
|
254
|
+
Optional[int],
|
|
255
|
+
typer.Option(
|
|
256
|
+
help="The maximum number of replicas (instances) to scale to for the Inference Endpoint.",
|
|
257
|
+
),
|
|
258
|
+
] = None,
|
|
259
|
+
scale_to_zero_timeout: Annotated[
|
|
260
|
+
Optional[int],
|
|
261
|
+
typer.Option(
|
|
262
|
+
help="The duration in minutes before an inactive endpoint is scaled to zero.",
|
|
263
|
+
),
|
|
264
|
+
] = None,
|
|
265
|
+
token: TokenOpt = None,
|
|
266
|
+
) -> None:
|
|
267
|
+
"""Update an existing endpoint."""
|
|
268
|
+
api = get_hf_api(token=token)
|
|
269
|
+
try:
|
|
270
|
+
endpoint = api.update_inference_endpoint(
|
|
271
|
+
name=name,
|
|
272
|
+
namespace=namespace,
|
|
273
|
+
repository=repo,
|
|
274
|
+
framework=framework,
|
|
275
|
+
revision=revision,
|
|
276
|
+
task=task,
|
|
277
|
+
accelerator=accelerator,
|
|
278
|
+
instance_size=instance_size,
|
|
279
|
+
instance_type=instance_type,
|
|
280
|
+
min_replica=min_replica,
|
|
281
|
+
max_replica=max_replica,
|
|
282
|
+
scale_to_zero_timeout=scale_to_zero_timeout,
|
|
283
|
+
token=token,
|
|
284
|
+
)
|
|
285
|
+
except HfHubHTTPError as error:
|
|
286
|
+
typer.echo(f"Update failed: {error}")
|
|
287
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
288
|
+
_print_endpoint(endpoint)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
@ie_cli.command()
|
|
292
|
+
def delete(
|
|
293
|
+
name: NameArg,
|
|
294
|
+
namespace: NamespaceOpt = None,
|
|
295
|
+
yes: Annotated[
|
|
296
|
+
bool,
|
|
297
|
+
typer.Option("--yes", help="Skip confirmation prompts."),
|
|
298
|
+
] = False,
|
|
299
|
+
token: TokenOpt = None,
|
|
300
|
+
) -> None:
|
|
301
|
+
"""Delete an Inference Endpoint permanently."""
|
|
302
|
+
if not yes:
|
|
303
|
+
confirmation = typer.prompt(f"Delete endpoint '{name}'? Type the name to confirm.")
|
|
304
|
+
if confirmation != name:
|
|
305
|
+
typer.echo("Aborted.")
|
|
306
|
+
raise typer.Exit(code=2)
|
|
307
|
+
|
|
308
|
+
api = get_hf_api(token=token)
|
|
309
|
+
try:
|
|
310
|
+
api.delete_inference_endpoint(name=name, namespace=namespace, token=token)
|
|
311
|
+
except HfHubHTTPError as error:
|
|
312
|
+
typer.echo(f"Delete failed: {error}")
|
|
313
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
314
|
+
|
|
315
|
+
typer.echo(f"Deleted '{name}'.")
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
@ie_cli.command()
|
|
319
|
+
def pause(
|
|
320
|
+
name: NameArg,
|
|
321
|
+
namespace: NamespaceOpt = None,
|
|
322
|
+
token: TokenOpt = None,
|
|
323
|
+
) -> None:
|
|
324
|
+
"""Pause an Inference Endpoint."""
|
|
325
|
+
api = get_hf_api(token=token)
|
|
326
|
+
try:
|
|
327
|
+
endpoint = api.pause_inference_endpoint(name=name, namespace=namespace, token=token)
|
|
328
|
+
except HfHubHTTPError as error:
|
|
329
|
+
typer.echo(f"Pause failed: {error}")
|
|
330
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
331
|
+
|
|
332
|
+
_print_endpoint(endpoint)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
@ie_cli.command()
|
|
336
|
+
def resume(
|
|
337
|
+
name: NameArg,
|
|
338
|
+
namespace: NamespaceOpt = None,
|
|
339
|
+
fail_if_already_running: Annotated[
|
|
340
|
+
bool,
|
|
341
|
+
typer.Option(
|
|
342
|
+
"--fail-if-already-running",
|
|
343
|
+
help="If `True`, the method will raise an error if the Inference Endpoint is already running.",
|
|
344
|
+
),
|
|
345
|
+
] = False,
|
|
346
|
+
token: TokenOpt = None,
|
|
347
|
+
) -> None:
|
|
348
|
+
"""Resume an Inference Endpoint."""
|
|
349
|
+
api = get_hf_api(token=token)
|
|
350
|
+
try:
|
|
351
|
+
endpoint = api.resume_inference_endpoint(
|
|
352
|
+
name=name,
|
|
353
|
+
namespace=namespace,
|
|
354
|
+
token=token,
|
|
355
|
+
running_ok=not fail_if_already_running,
|
|
356
|
+
)
|
|
357
|
+
except HfHubHTTPError as error:
|
|
358
|
+
typer.echo(f"Resume failed: {error}")
|
|
359
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
360
|
+
_print_endpoint(endpoint)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
@ie_cli.command()
|
|
364
|
+
def scale_to_zero(
|
|
365
|
+
name: NameArg,
|
|
366
|
+
namespace: NamespaceOpt = None,
|
|
367
|
+
token: TokenOpt = None,
|
|
368
|
+
) -> None:
|
|
369
|
+
"""Scale an Inference Endpoint to zero."""
|
|
370
|
+
api = get_hf_api(token=token)
|
|
371
|
+
try:
|
|
372
|
+
endpoint = api.scale_to_zero_inference_endpoint(name=name, namespace=namespace, token=token)
|
|
373
|
+
except HfHubHTTPError as error:
|
|
374
|
+
typer.echo(f"Scale To Zero failed: {error}")
|
|
375
|
+
raise typer.Exit(code=error.response.status_code) from error
|
|
376
|
+
|
|
377
|
+
_print_endpoint(endpoint)
|