huggingface-hub 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +145 -46
- huggingface_hub/_commit_api.py +168 -119
- huggingface_hub/_commit_scheduler.py +15 -15
- huggingface_hub/_inference_endpoints.py +15 -12
- huggingface_hub/_jobs_api.py +301 -0
- huggingface_hub/_local_folder.py +18 -3
- huggingface_hub/_login.py +31 -63
- huggingface_hub/_oauth.py +460 -0
- huggingface_hub/_snapshot_download.py +239 -80
- huggingface_hub/_space_api.py +5 -5
- huggingface_hub/_tensorboard_logger.py +15 -19
- huggingface_hub/_upload_large_folder.py +172 -76
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +13 -25
- huggingface_hub/{commands → cli}/__init__.py +1 -15
- huggingface_hub/cli/_cli_utils.py +173 -0
- huggingface_hub/cli/auth.py +147 -0
- huggingface_hub/cli/cache.py +841 -0
- huggingface_hub/cli/download.py +189 -0
- huggingface_hub/cli/hf.py +60 -0
- huggingface_hub/cli/inference_endpoints.py +377 -0
- huggingface_hub/cli/jobs.py +772 -0
- huggingface_hub/cli/lfs.py +175 -0
- huggingface_hub/cli/repo.py +315 -0
- huggingface_hub/cli/repo_files.py +94 -0
- huggingface_hub/{commands/env.py → cli/system.py} +10 -13
- huggingface_hub/cli/upload.py +294 -0
- huggingface_hub/cli/upload_large_folder.py +117 -0
- huggingface_hub/community.py +20 -12
- huggingface_hub/constants.py +38 -53
- huggingface_hub/dataclasses.py +609 -0
- huggingface_hub/errors.py +80 -30
- huggingface_hub/fastai_utils.py +30 -41
- huggingface_hub/file_download.py +435 -351
- huggingface_hub/hf_api.py +2050 -1124
- huggingface_hub/hf_file_system.py +269 -152
- huggingface_hub/hub_mixin.py +43 -63
- huggingface_hub/inference/_client.py +347 -434
- huggingface_hub/inference/_common.py +133 -121
- huggingface_hub/inference/_generated/_async_client.py +397 -541
- huggingface_hub/inference/_generated/types/__init__.py +5 -1
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +59 -23
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
- huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/__init__.py +0 -0
- huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
- huggingface_hub/inference/_mcp/agent.py +100 -0
- huggingface_hub/inference/_mcp/cli.py +247 -0
- huggingface_hub/inference/_mcp/constants.py +81 -0
- huggingface_hub/inference/_mcp/mcp_client.py +395 -0
- huggingface_hub/inference/_mcp/types.py +45 -0
- huggingface_hub/inference/_mcp/utils.py +128 -0
- huggingface_hub/inference/_providers/__init__.py +82 -7
- huggingface_hub/inference/_providers/_common.py +129 -27
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cerebras.py +1 -1
- huggingface_hub/inference/_providers/clarifai.py +13 -0
- huggingface_hub/inference/_providers/cohere.py +20 -3
- huggingface_hub/inference/_providers/fal_ai.py +183 -56
- huggingface_hub/inference/_providers/featherless_ai.py +38 -0
- huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
- huggingface_hub/inference/_providers/groq.py +9 -0
- huggingface_hub/inference/_providers/hf_inference.py +69 -30
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +33 -5
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +44 -0
- huggingface_hub/inference/_providers/openai.py +3 -1
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/replicate.py +31 -13
- huggingface_hub/inference/_providers/sambanova.py +18 -4
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/together.py +20 -5
- huggingface_hub/inference/_providers/wavespeed.py +138 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +33 -100
- huggingface_hub/repocard.py +34 -38
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +12 -15
- huggingface_hub/serialization/_dduf.py +8 -8
- huggingface_hub/serialization/_torch.py +69 -69
- huggingface_hub/utils/__init__.py +19 -8
- huggingface_hub/utils/_auth.py +7 -7
- huggingface_hub/utils/_cache_manager.py +92 -147
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +55 -0
- huggingface_hub/utils/_experimental.py +7 -5
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +5 -5
- huggingface_hub/utils/_headers.py +8 -30
- huggingface_hub/utils/_http.py +398 -239
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +61 -24
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +4 -4
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +55 -74
- huggingface_hub/utils/_verification.py +167 -0
- huggingface_hub/utils/_xet.py +64 -17
- huggingface_hub/utils/_xet_progress_reporting.py +162 -0
- huggingface_hub/utils/insecure_hashlib.py +3 -5
- huggingface_hub/utils/logging.py +8 -11
- huggingface_hub/utils/tqdm.py +5 -4
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -85
- huggingface_hub-1.1.3.dist-info/RECORD +155 -0
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
- huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
- huggingface_hub/commands/delete_cache.py +0 -474
- huggingface_hub/commands/download.py +0 -200
- huggingface_hub/commands/huggingface_cli.py +0 -61
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo_files.py +0 -128
- huggingface_hub/commands/scan_cache.py +0 -181
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -314
- huggingface_hub/commands/upload_large_folder.py +0 -129
- huggingface_hub/commands/user.py +0 -304
- huggingface_hub/commands/version.py +0 -37
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.31.0rc0.dist-info/RECORD +0 -135
- huggingface_hub-0.31.0rc0.dist-info/entry_points.txt +0 -6
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2023-present, the HuggingFace Inc. team.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Contains command to upload a repo or file with the CLI.
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
# Upload file (implicit)
|
|
19
|
+
hf upload my-cool-model ./my-cool-model.safetensors
|
|
20
|
+
|
|
21
|
+
# Upload file (explicit)
|
|
22
|
+
hf upload my-cool-model ./my-cool-model.safetensors model.safetensors
|
|
23
|
+
|
|
24
|
+
# Upload directory (implicit). If `my-cool-model/` is a directory it will be uploaded, otherwise an exception is raised.
|
|
25
|
+
hf upload my-cool-model
|
|
26
|
+
|
|
27
|
+
# Upload directory (explicit)
|
|
28
|
+
hf upload my-cool-model ./models/my-cool-model .
|
|
29
|
+
|
|
30
|
+
# Upload filtered directory (example: tensorboard logs except for the last run)
|
|
31
|
+
hf upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
|
|
32
|
+
|
|
33
|
+
# Upload with wildcard
|
|
34
|
+
hf upload my-cool-model "./model/training/*.safetensors"
|
|
35
|
+
|
|
36
|
+
# Upload private dataset
|
|
37
|
+
hf upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
|
|
38
|
+
|
|
39
|
+
# Upload with token
|
|
40
|
+
hf upload Wauplin/my-cool-model --token=hf_****
|
|
41
|
+
|
|
42
|
+
# Sync local Space with Hub (upload new files, delete removed files)
|
|
43
|
+
hf upload Wauplin/space-example --repo-type=space --exclude="/logs/*" --delete="*" --commit-message="Sync local Space with Hub"
|
|
44
|
+
|
|
45
|
+
# Schedule commits every 30 minutes
|
|
46
|
+
hf upload Wauplin/my-cool-model --every=30
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
import os
|
|
50
|
+
import time
|
|
51
|
+
import warnings
|
|
52
|
+
from typing import Annotated, Optional
|
|
53
|
+
|
|
54
|
+
import typer
|
|
55
|
+
|
|
56
|
+
from huggingface_hub import logging
|
|
57
|
+
from huggingface_hub._commit_scheduler import CommitScheduler
|
|
58
|
+
from huggingface_hub.errors import RevisionNotFoundError
|
|
59
|
+
from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
|
|
60
|
+
|
|
61
|
+
from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
logger = logging.get_logger(__name__)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def upload(
|
|
68
|
+
repo_id: RepoIdArg,
|
|
69
|
+
local_path: Annotated[
|
|
70
|
+
Optional[str],
|
|
71
|
+
typer.Argument(
|
|
72
|
+
help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
|
|
73
|
+
),
|
|
74
|
+
] = None,
|
|
75
|
+
path_in_repo: Annotated[
|
|
76
|
+
Optional[str],
|
|
77
|
+
typer.Argument(
|
|
78
|
+
help="Path of the file or folder in the repo. Defaults to the relative path of the file or folder.",
|
|
79
|
+
),
|
|
80
|
+
] = None,
|
|
81
|
+
repo_type: RepoTypeOpt = RepoType.model,
|
|
82
|
+
revision: RevisionOpt = None,
|
|
83
|
+
private: PrivateOpt = False,
|
|
84
|
+
include: Annotated[
|
|
85
|
+
Optional[list[str]],
|
|
86
|
+
typer.Option(
|
|
87
|
+
help="Glob patterns to match files to upload.",
|
|
88
|
+
),
|
|
89
|
+
] = None,
|
|
90
|
+
exclude: Annotated[
|
|
91
|
+
Optional[list[str]],
|
|
92
|
+
typer.Option(
|
|
93
|
+
help="Glob patterns to exclude from files to upload.",
|
|
94
|
+
),
|
|
95
|
+
] = None,
|
|
96
|
+
delete: Annotated[
|
|
97
|
+
Optional[list[str]],
|
|
98
|
+
typer.Option(
|
|
99
|
+
help="Glob patterns for file to be deleted from the repo while committing.",
|
|
100
|
+
),
|
|
101
|
+
] = None,
|
|
102
|
+
commit_message: Annotated[
|
|
103
|
+
Optional[str],
|
|
104
|
+
typer.Option(
|
|
105
|
+
help="The summary / title / first line of the generated commit.",
|
|
106
|
+
),
|
|
107
|
+
] = None,
|
|
108
|
+
commit_description: Annotated[
|
|
109
|
+
Optional[str],
|
|
110
|
+
typer.Option(
|
|
111
|
+
help="The description of the generated commit.",
|
|
112
|
+
),
|
|
113
|
+
] = None,
|
|
114
|
+
create_pr: Annotated[
|
|
115
|
+
bool,
|
|
116
|
+
typer.Option(
|
|
117
|
+
help="Whether to upload content as a new Pull Request.",
|
|
118
|
+
),
|
|
119
|
+
] = False,
|
|
120
|
+
every: Annotated[
|
|
121
|
+
Optional[float],
|
|
122
|
+
typer.Option(
|
|
123
|
+
help="f set, a background job is scheduled to create commits every `every` minutes.",
|
|
124
|
+
),
|
|
125
|
+
] = None,
|
|
126
|
+
token: TokenOpt = None,
|
|
127
|
+
quiet: Annotated[
|
|
128
|
+
bool,
|
|
129
|
+
typer.Option(
|
|
130
|
+
help="Disable progress bars and warnings; print only the returned path.",
|
|
131
|
+
),
|
|
132
|
+
] = False,
|
|
133
|
+
) -> None:
|
|
134
|
+
"""Upload a file or a folder to the Hub. Recommended for single-commit uploads."""
|
|
135
|
+
|
|
136
|
+
if every is not None and every <= 0:
|
|
137
|
+
raise typer.BadParameter("--every must be a positive value", param_hint="every")
|
|
138
|
+
|
|
139
|
+
repo_type_str = repo_type.value
|
|
140
|
+
|
|
141
|
+
api = get_hf_api(token=token)
|
|
142
|
+
|
|
143
|
+
# Resolve local_path and path_in_repo based on implicit/explicit rules
|
|
144
|
+
resolved_local_path, resolved_path_in_repo, resolved_include = _resolve_upload_paths(
|
|
145
|
+
repo_id=repo_id, local_path=local_path, path_in_repo=path_in_repo, include=include
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def run_upload() -> str:
|
|
149
|
+
if os.path.isfile(resolved_local_path):
|
|
150
|
+
if resolved_include is not None and len(resolved_include) > 0 and isinstance(resolved_include, list):
|
|
151
|
+
warnings.warn("Ignoring --include since a single file is uploaded.")
|
|
152
|
+
if exclude is not None and len(exclude) > 0:
|
|
153
|
+
warnings.warn("Ignoring --exclude since a single file is uploaded.")
|
|
154
|
+
if delete is not None and len(delete) > 0:
|
|
155
|
+
warnings.warn("Ignoring --delete since a single file is uploaded.")
|
|
156
|
+
|
|
157
|
+
# Schedule commits if `every` is set
|
|
158
|
+
if every is not None:
|
|
159
|
+
if os.path.isfile(resolved_local_path):
|
|
160
|
+
# If file => watch entire folder + use allow_patterns
|
|
161
|
+
folder_path = os.path.dirname(resolved_local_path)
|
|
162
|
+
pi = (
|
|
163
|
+
resolved_path_in_repo[: -len(resolved_local_path)]
|
|
164
|
+
if resolved_path_in_repo.endswith(resolved_local_path)
|
|
165
|
+
else resolved_path_in_repo
|
|
166
|
+
)
|
|
167
|
+
allow_patterns = [resolved_local_path]
|
|
168
|
+
ignore_patterns: Optional[list[str]] = []
|
|
169
|
+
else:
|
|
170
|
+
folder_path = resolved_local_path
|
|
171
|
+
pi = resolved_path_in_repo
|
|
172
|
+
allow_patterns = (
|
|
173
|
+
resolved_include or []
|
|
174
|
+
if isinstance(resolved_include, list)
|
|
175
|
+
else [resolved_include]
|
|
176
|
+
if isinstance(resolved_include, str)
|
|
177
|
+
else []
|
|
178
|
+
)
|
|
179
|
+
ignore_patterns = exclude or []
|
|
180
|
+
if delete is not None and len(delete) > 0:
|
|
181
|
+
warnings.warn("Ignoring --delete when uploading with scheduled commits.")
|
|
182
|
+
|
|
183
|
+
scheduler = CommitScheduler(
|
|
184
|
+
folder_path=folder_path,
|
|
185
|
+
repo_id=repo_id,
|
|
186
|
+
repo_type=repo_type_str,
|
|
187
|
+
revision=revision,
|
|
188
|
+
allow_patterns=allow_patterns,
|
|
189
|
+
ignore_patterns=ignore_patterns,
|
|
190
|
+
path_in_repo=pi,
|
|
191
|
+
private=private,
|
|
192
|
+
every=every,
|
|
193
|
+
hf_api=api,
|
|
194
|
+
)
|
|
195
|
+
print(f"Scheduling commits every {every} minutes to {scheduler.repo_id}.")
|
|
196
|
+
try:
|
|
197
|
+
while True:
|
|
198
|
+
time.sleep(100)
|
|
199
|
+
except KeyboardInterrupt:
|
|
200
|
+
scheduler.stop()
|
|
201
|
+
return "Stopped scheduled commits."
|
|
202
|
+
|
|
203
|
+
# Otherwise, create repo and proceed with the upload
|
|
204
|
+
if not os.path.isfile(resolved_local_path) and not os.path.isdir(resolved_local_path):
|
|
205
|
+
raise FileNotFoundError(f"No such file or directory: '{resolved_local_path}'.")
|
|
206
|
+
created = api.create_repo(
|
|
207
|
+
repo_id=repo_id,
|
|
208
|
+
repo_type=repo_type_str,
|
|
209
|
+
exist_ok=True,
|
|
210
|
+
private=private,
|
|
211
|
+
space_sdk="gradio" if repo_type_str == "space" else None,
|
|
212
|
+
# ^ We don't want it to fail when uploading to a Space => let's set Gradio by default.
|
|
213
|
+
# ^ I'd rather not add CLI args to set it explicitly as we already have `hf repo create` for that.
|
|
214
|
+
).repo_id
|
|
215
|
+
|
|
216
|
+
# Check if branch already exists and if not, create it
|
|
217
|
+
if revision is not None and not create_pr:
|
|
218
|
+
try:
|
|
219
|
+
api.repo_info(repo_id=created, repo_type=repo_type_str, revision=revision)
|
|
220
|
+
except RevisionNotFoundError:
|
|
221
|
+
logger.info(f"Branch '{revision}' not found. Creating it...")
|
|
222
|
+
api.create_branch(repo_id=created, repo_type=repo_type_str, branch=revision, exist_ok=True)
|
|
223
|
+
# ^ `exist_ok=True` to avoid race concurrency issues
|
|
224
|
+
|
|
225
|
+
# File-based upload
|
|
226
|
+
if os.path.isfile(resolved_local_path):
|
|
227
|
+
return api.upload_file(
|
|
228
|
+
path_or_fileobj=resolved_local_path,
|
|
229
|
+
path_in_repo=resolved_path_in_repo,
|
|
230
|
+
repo_id=created,
|
|
231
|
+
repo_type=repo_type_str,
|
|
232
|
+
revision=revision,
|
|
233
|
+
commit_message=commit_message,
|
|
234
|
+
commit_description=commit_description,
|
|
235
|
+
create_pr=create_pr,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Folder-based upload
|
|
239
|
+
return api.upload_folder(
|
|
240
|
+
folder_path=resolved_local_path,
|
|
241
|
+
path_in_repo=resolved_path_in_repo,
|
|
242
|
+
repo_id=created,
|
|
243
|
+
repo_type=repo_type_str,
|
|
244
|
+
revision=revision,
|
|
245
|
+
commit_message=commit_message,
|
|
246
|
+
commit_description=commit_description,
|
|
247
|
+
create_pr=create_pr,
|
|
248
|
+
allow_patterns=(
|
|
249
|
+
resolved_include
|
|
250
|
+
if isinstance(resolved_include, list)
|
|
251
|
+
else [resolved_include]
|
|
252
|
+
if isinstance(resolved_include, str)
|
|
253
|
+
else None
|
|
254
|
+
),
|
|
255
|
+
ignore_patterns=exclude,
|
|
256
|
+
delete_patterns=delete,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
if quiet:
|
|
260
|
+
disable_progress_bars()
|
|
261
|
+
with warnings.catch_warnings():
|
|
262
|
+
warnings.simplefilter("ignore")
|
|
263
|
+
print(run_upload())
|
|
264
|
+
enable_progress_bars()
|
|
265
|
+
else:
|
|
266
|
+
print(run_upload())
|
|
267
|
+
logging.set_verbosity_warning()
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _resolve_upload_paths(
|
|
271
|
+
*, repo_id: str, local_path: Optional[str], path_in_repo: Optional[str], include: Optional[list[str]]
|
|
272
|
+
) -> tuple[str, str, Optional[list[str]]]:
|
|
273
|
+
repo_name = repo_id.split("/")[-1]
|
|
274
|
+
resolved_include = include
|
|
275
|
+
|
|
276
|
+
if local_path is not None and any(c in local_path for c in ["*", "?", "["]):
|
|
277
|
+
if include is not None:
|
|
278
|
+
raise ValueError("Cannot set --include when local_path contains a wildcard.")
|
|
279
|
+
if path_in_repo is not None and path_in_repo != ".":
|
|
280
|
+
raise ValueError("Cannot set path_in_repo when local_path contains a wildcard.")
|
|
281
|
+
return ".", local_path, ["."] # will be adjusted below; placeholder for type
|
|
282
|
+
|
|
283
|
+
if local_path is None and os.path.isfile(repo_name):
|
|
284
|
+
return repo_name, repo_name, resolved_include
|
|
285
|
+
if local_path is None and os.path.isdir(repo_name):
|
|
286
|
+
return repo_name, ".", resolved_include
|
|
287
|
+
if local_path is None:
|
|
288
|
+
raise ValueError(f"'{repo_name}' is not a local file or folder. Please set local_path explicitly.")
|
|
289
|
+
|
|
290
|
+
if path_in_repo is None and os.path.isfile(local_path):
|
|
291
|
+
return local_path, os.path.basename(local_path), resolved_include
|
|
292
|
+
if path_in_repo is None:
|
|
293
|
+
return local_path, ".", resolved_include
|
|
294
|
+
return local_path, path_in_repo, resolved_include
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2023-present, the HuggingFace Inc. team.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
"""Contains command to upload a large folder with the CLI."""
|
|
16
|
+
|
|
17
|
+
import os
|
|
18
|
+
from typing import Annotated, Optional
|
|
19
|
+
|
|
20
|
+
import typer
|
|
21
|
+
|
|
22
|
+
from huggingface_hub import logging
|
|
23
|
+
from huggingface_hub.utils import ANSI, disable_progress_bars
|
|
24
|
+
|
|
25
|
+
from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
logger = logging.get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def upload_large_folder(
|
|
32
|
+
repo_id: RepoIdArg,
|
|
33
|
+
local_path: Annotated[
|
|
34
|
+
str,
|
|
35
|
+
typer.Argument(
|
|
36
|
+
help="Local path to the folder to upload.",
|
|
37
|
+
),
|
|
38
|
+
],
|
|
39
|
+
repo_type: RepoTypeOpt = RepoType.model,
|
|
40
|
+
revision: RevisionOpt = None,
|
|
41
|
+
private: PrivateOpt = False,
|
|
42
|
+
include: Annotated[
|
|
43
|
+
Optional[list[str]],
|
|
44
|
+
typer.Option(
|
|
45
|
+
help="Glob patterns to match files to upload.",
|
|
46
|
+
),
|
|
47
|
+
] = None,
|
|
48
|
+
exclude: Annotated[
|
|
49
|
+
Optional[list[str]],
|
|
50
|
+
typer.Option(
|
|
51
|
+
help="Glob patterns to exclude from files to upload.",
|
|
52
|
+
),
|
|
53
|
+
] = None,
|
|
54
|
+
token: TokenOpt = None,
|
|
55
|
+
num_workers: Annotated[
|
|
56
|
+
Optional[int],
|
|
57
|
+
typer.Option(
|
|
58
|
+
help="Number of workers to use to hash, upload and commit files.",
|
|
59
|
+
),
|
|
60
|
+
] = None,
|
|
61
|
+
no_report: Annotated[
|
|
62
|
+
bool,
|
|
63
|
+
typer.Option(
|
|
64
|
+
help="Whether to disable regular status report.",
|
|
65
|
+
),
|
|
66
|
+
] = False,
|
|
67
|
+
no_bars: Annotated[
|
|
68
|
+
bool,
|
|
69
|
+
typer.Option(
|
|
70
|
+
help="Whether to disable progress bars.",
|
|
71
|
+
),
|
|
72
|
+
] = False,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Upload a large folder to the Hub. Recommended for resumable uploads."""
|
|
75
|
+
if not os.path.isdir(local_path):
|
|
76
|
+
raise typer.BadParameter("Large upload is only supported for folders.", param_hint="local_path")
|
|
77
|
+
|
|
78
|
+
print(
|
|
79
|
+
ANSI.yellow(
|
|
80
|
+
"You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
|
|
81
|
+
"This is a new feature so feedback is very welcome!\n"
|
|
82
|
+
"\n"
|
|
83
|
+
"A few things to keep in mind:\n"
|
|
84
|
+
" - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
|
|
85
|
+
" - Do not start several processes in parallel.\n"
|
|
86
|
+
" - You can interrupt and resume the process at any time. "
|
|
87
|
+
"The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
|
|
88
|
+
" - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
|
|
89
|
+
"\n"
|
|
90
|
+
f"Some temporary metadata will be stored under `{local_path}/.cache/huggingface`.\n"
|
|
91
|
+
" - You must not modify those files manually.\n"
|
|
92
|
+
" - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
|
|
93
|
+
" - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
|
|
94
|
+
"\n"
|
|
95
|
+
"If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
|
|
96
|
+
"You can also entirely disable the status report with `--no-report`.\n"
|
|
97
|
+
"\n"
|
|
98
|
+
"For more details, run `hf upload-large-folder --help` or check the documentation at "
|
|
99
|
+
"https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if no_bars:
|
|
104
|
+
disable_progress_bars()
|
|
105
|
+
|
|
106
|
+
api = get_hf_api(token=token)
|
|
107
|
+
api.upload_large_folder(
|
|
108
|
+
repo_id=repo_id,
|
|
109
|
+
folder_path=local_path,
|
|
110
|
+
repo_type=repo_type.value,
|
|
111
|
+
revision=revision,
|
|
112
|
+
private=private,
|
|
113
|
+
allow_patterns=include,
|
|
114
|
+
ignore_patterns=exclude,
|
|
115
|
+
num_workers=num_workers,
|
|
116
|
+
print_report=not no_report,
|
|
117
|
+
)
|
huggingface_hub/community.py
CHANGED
|
@@ -7,7 +7,7 @@ for more information on Pull Requests, Discussions, and the community tab.
|
|
|
7
7
|
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from datetime import datetime
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import Literal, Optional, TypedDict, Union
|
|
11
11
|
|
|
12
12
|
from . import constants
|
|
13
13
|
from .utils import parse_datetime
|
|
@@ -116,7 +116,7 @@ class DiscussionWithDetails(Discussion):
|
|
|
116
116
|
The `datetime` of creation of the Discussion / Pull Request.
|
|
117
117
|
events (`list` of [`DiscussionEvent`])
|
|
118
118
|
The list of [`DiscussionEvents`] in this Discussion or Pull Request.
|
|
119
|
-
conflicting_files (`Union[
|
|
119
|
+
conflicting_files (`Union[list[str], bool, None]`, *optional*):
|
|
120
120
|
A list of conflicting files if this is a Pull Request.
|
|
121
121
|
`None` if `self.is_pull_request` is `False`.
|
|
122
122
|
`True` if there are conflicting files but the list can't be retrieved.
|
|
@@ -136,13 +136,21 @@ class DiscussionWithDetails(Discussion):
|
|
|
136
136
|
(property) URL of the discussion on the Hub.
|
|
137
137
|
"""
|
|
138
138
|
|
|
139
|
-
events:
|
|
140
|
-
conflicting_files: Union[
|
|
139
|
+
events: list["DiscussionEvent"]
|
|
140
|
+
conflicting_files: Union[list[str], bool, None]
|
|
141
141
|
target_branch: Optional[str]
|
|
142
142
|
merge_commit_oid: Optional[str]
|
|
143
143
|
diff: Optional[str]
|
|
144
144
|
|
|
145
145
|
|
|
146
|
+
class DiscussionEventArgs(TypedDict):
|
|
147
|
+
id: str
|
|
148
|
+
type: str
|
|
149
|
+
created_at: datetime
|
|
150
|
+
author: str
|
|
151
|
+
_event: dict
|
|
152
|
+
|
|
153
|
+
|
|
146
154
|
@dataclass
|
|
147
155
|
class DiscussionEvent:
|
|
148
156
|
"""
|
|
@@ -222,7 +230,7 @@ class DiscussionComment(DiscussionEvent):
|
|
|
222
230
|
return self._event["data"]["latest"].get("author", {}).get("name", "deleted")
|
|
223
231
|
|
|
224
232
|
@property
|
|
225
|
-
def edit_history(self) ->
|
|
233
|
+
def edit_history(self) -> list[dict]:
|
|
226
234
|
"""The edit history of the comment"""
|
|
227
235
|
return self._event["data"]["history"]
|
|
228
236
|
|
|
@@ -319,13 +327,13 @@ def deserialize_event(event: dict) -> DiscussionEvent:
|
|
|
319
327
|
event_type: str = event["type"]
|
|
320
328
|
created_at = parse_datetime(event["createdAt"])
|
|
321
329
|
|
|
322
|
-
common_args =
|
|
323
|
-
id
|
|
324
|
-
type
|
|
325
|
-
created_at
|
|
326
|
-
author
|
|
327
|
-
_event
|
|
328
|
-
|
|
330
|
+
common_args: DiscussionEventArgs = {
|
|
331
|
+
"id": event_id,
|
|
332
|
+
"type": event_type,
|
|
333
|
+
"created_at": created_at,
|
|
334
|
+
"author": event.get("author", {}).get("name", "deleted"),
|
|
335
|
+
"_event": event,
|
|
336
|
+
}
|
|
329
337
|
|
|
330
338
|
if event_type == "comment":
|
|
331
339
|
return DiscussionComment(
|
huggingface_hub/constants.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
3
|
import typing
|
|
4
|
-
from typing import Literal, Optional
|
|
4
|
+
from typing import Literal, Optional
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
# Possible values for env variables
|
|
@@ -35,7 +35,6 @@ DEFAULT_ETAG_TIMEOUT = 10
|
|
|
35
35
|
DEFAULT_DOWNLOAD_TIMEOUT = 10
|
|
36
36
|
DEFAULT_REQUEST_TIMEOUT = 10
|
|
37
37
|
DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
|
|
38
|
-
HF_TRANSFER_CONCURRENCY = 100
|
|
39
38
|
MAX_HTTP_DOWNLOAD_SIZE = 50 * 1000 * 1000 * 1000 # 50 GB
|
|
40
39
|
|
|
41
40
|
# Constants for serialization
|
|
@@ -82,6 +81,17 @@ INFERENCE_ENDPOINT = os.environ.get("HF_INFERENCE_ENDPOINT", "https://api-infere
|
|
|
82
81
|
INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
|
|
83
82
|
INFERENCE_CATALOG_ENDPOINT = "https://endpoints.huggingface.co/api/catalog"
|
|
84
83
|
|
|
84
|
+
# See https://api.endpoints.huggingface.cloud/#post-/v2/endpoint/-namespace-
|
|
85
|
+
INFERENCE_ENDPOINT_IMAGE_KEYS = [
|
|
86
|
+
"custom",
|
|
87
|
+
"huggingface",
|
|
88
|
+
"huggingfaceNeuron",
|
|
89
|
+
"llamacpp",
|
|
90
|
+
"tei",
|
|
91
|
+
"tgi",
|
|
92
|
+
"tgiNeuron",
|
|
93
|
+
]
|
|
94
|
+
|
|
85
95
|
# Proxy for third-party providers
|
|
86
96
|
INFERENCE_PROXY_TEMPLATE = "https://router.huggingface.co/{provider}"
|
|
87
97
|
|
|
@@ -107,9 +117,9 @@ REPO_TYPES_MAPPING = {
|
|
|
107
117
|
}
|
|
108
118
|
|
|
109
119
|
DiscussionTypeFilter = Literal["all", "discussion", "pull_request"]
|
|
110
|
-
DISCUSSION_TYPES:
|
|
120
|
+
DISCUSSION_TYPES: tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
|
|
111
121
|
DiscussionStatusFilter = Literal["all", "open", "closed"]
|
|
112
|
-
DISCUSSION_STATUS:
|
|
122
|
+
DISCUSSION_STATUS: tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
|
|
113
123
|
|
|
114
124
|
# Webhook subscription types
|
|
115
125
|
WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
|
|
@@ -124,7 +134,6 @@ HF_HOME = os.path.expandvars(
|
|
|
124
134
|
)
|
|
125
135
|
)
|
|
126
136
|
)
|
|
127
|
-
hf_cache_home = HF_HOME # for backward compatibility. TODO: remove this in 1.0.0
|
|
128
137
|
|
|
129
138
|
default_cache_path = os.path.join(HF_HOME, "hub")
|
|
130
139
|
default_assets_cache_path = os.path.join(HF_HOME, "assets")
|
|
@@ -153,6 +162,10 @@ HF_ASSETS_CACHE = os.path.expandvars(
|
|
|
153
162
|
|
|
154
163
|
HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
|
|
155
164
|
|
|
165
|
+
# File created to mark that the version check has been done.
|
|
166
|
+
# Check is performed once per 24 hours at most.
|
|
167
|
+
CHECK_FOR_UPDATE_DONE_PATH = os.path.join(HF_HOME, ".check_for_update_done")
|
|
168
|
+
|
|
156
169
|
# If set, log level will be set to DEBUG and all requests made to the Hub will be logged
|
|
157
170
|
# as curl commands for reproducibility.
|
|
158
171
|
HF_DEBUG = _is_true(os.environ.get("HF_DEBUG"))
|
|
@@ -201,18 +214,18 @@ HF_HUB_DISABLE_EXPERIMENTAL_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISA
|
|
|
201
214
|
# Disable sending the cached token by default is all HTTP requests to the Hub
|
|
202
215
|
HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN"))
|
|
203
216
|
|
|
204
|
-
|
|
205
|
-
# See:
|
|
206
|
-
# - https://pypi.org/project/hf-transfer/
|
|
207
|
-
# - https://github.com/huggingface/hf_transfer (private)
|
|
208
|
-
HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER"))
|
|
217
|
+
HF_XET_HIGH_PERFORMANCE: bool = _is_true(os.environ.get("HF_XET_HIGH_PERFORMANCE"))
|
|
209
218
|
|
|
219
|
+
# hf_transfer is not used anymore. Let's warn user is case they set the env variable
|
|
220
|
+
if _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER")) and not HF_XET_HIGH_PERFORMANCE:
|
|
221
|
+
import warnings
|
|
210
222
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
223
|
+
warnings.warn(
|
|
224
|
+
"The `HF_HUB_ENABLE_HF_TRANSFER` environment variable is deprecated as 'hf_transfer' is not used anymore. "
|
|
225
|
+
"Please use `HF_XET_HIGH_PERFORMANCE` instead to enable high performance transfer with Xet. "
|
|
226
|
+
"Visit https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfxethighperformance for more details.",
|
|
227
|
+
DeprecationWarning,
|
|
228
|
+
)
|
|
216
229
|
|
|
217
230
|
# Used to override the etag timeout on a system level
|
|
218
231
|
HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT
|
|
@@ -220,49 +233,20 @@ HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEF
|
|
|
220
233
|
# Used to override the get request timeout on a system level
|
|
221
234
|
HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT
|
|
222
235
|
|
|
223
|
-
# Allows to add information about the requester in the user-agent (
|
|
236
|
+
# Allows to add information about the requester in the user-agent (e.g. partner name)
|
|
224
237
|
HF_HUB_USER_AGENT_ORIGIN: Optional[str] = os.environ.get("HF_HUB_USER_AGENT_ORIGIN")
|
|
225
238
|
|
|
226
|
-
#
|
|
227
|
-
#
|
|
228
|
-
|
|
229
|
-
MAIN_INFERENCE_API_FRAMEWORKS = [
|
|
230
|
-
"diffusers",
|
|
231
|
-
"sentence-transformers",
|
|
232
|
-
"text-generation-inference",
|
|
233
|
-
"transformers",
|
|
234
|
-
]
|
|
239
|
+
# If OAuth didn't work after 2 redirects, there's likely a third-party cookie issue in the Space iframe view.
|
|
240
|
+
# In this case, we redirect the user to the non-iframe view.
|
|
241
|
+
OAUTH_MAX_REDIRECTS = 2
|
|
235
242
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
"doctr",
|
|
242
|
-
"espnet",
|
|
243
|
-
"fairseq",
|
|
244
|
-
"fastai",
|
|
245
|
-
"fasttext",
|
|
246
|
-
"flair",
|
|
247
|
-
"k2",
|
|
248
|
-
"keras",
|
|
249
|
-
"mindspore",
|
|
250
|
-
"nemo",
|
|
251
|
-
"open_clip",
|
|
252
|
-
"paddlenlp",
|
|
253
|
-
"peft",
|
|
254
|
-
"pyannote-audio",
|
|
255
|
-
"sklearn",
|
|
256
|
-
"spacy",
|
|
257
|
-
"span-marker",
|
|
258
|
-
"speechbrain",
|
|
259
|
-
"stanza",
|
|
260
|
-
"timm",
|
|
261
|
-
]
|
|
243
|
+
# OAuth-related environment variables injected by the Space
|
|
244
|
+
OAUTH_CLIENT_ID = os.environ.get("OAUTH_CLIENT_ID")
|
|
245
|
+
OAUTH_CLIENT_SECRET = os.environ.get("OAUTH_CLIENT_SECRET")
|
|
246
|
+
OAUTH_SCOPES = os.environ.get("OAUTH_SCOPES")
|
|
247
|
+
OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL")
|
|
262
248
|
|
|
263
249
|
# Xet constants
|
|
264
|
-
|
|
265
|
-
|
|
266
250
|
HUGGINGFACE_HEADER_X_XET_ENDPOINT = "X-Xet-Cas-Url"
|
|
267
251
|
HUGGINGFACE_HEADER_X_XET_ACCESS_TOKEN = "X-Xet-Access-Token"
|
|
268
252
|
HUGGINGFACE_HEADER_X_XET_EXPIRATION = "X-Xet-Token-Expiration"
|
|
@@ -272,3 +256,4 @@ HUGGINGFACE_HEADER_LINK_XET_AUTH_KEY = "xet-auth"
|
|
|
272
256
|
|
|
273
257
|
default_xet_cache_path = os.path.join(HF_HOME, "xet")
|
|
274
258
|
HF_XET_CACHE = os.getenv("HF_XET_CACHE", default_xet_cache_path)
|
|
259
|
+
HF_HUB_DISABLE_XET: bool = _is_true(os.environ.get("HF_HUB_DISABLE_XET"))
|