huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +46 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +176 -20
- huggingface_hub/_local_folder.py +1 -1
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +10 -14
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +13 -14
- huggingface_hub/_upload_large_folder.py +120 -13
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/_cli_utils.py +2 -2
- huggingface_hub/cli/auth.py +8 -6
- huggingface_hub/cli/cache.py +18 -20
- huggingface_hub/cli/download.py +4 -4
- huggingface_hub/cli/hf.py +2 -5
- huggingface_hub/cli/jobs.py +599 -22
- huggingface_hub/cli/lfs.py +4 -4
- huggingface_hub/cli/repo.py +11 -7
- huggingface_hub/cli/repo_files.py +2 -2
- huggingface_hub/cli/upload.py +4 -4
- huggingface_hub/cli/upload_large_folder.py +3 -3
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +13 -13
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -19
- huggingface_hub/errors.py +42 -29
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +162 -259
- huggingface_hub/hf_api.py +841 -616
- huggingface_hub/hf_file_system.py +98 -62
- huggingface_hub/hub_mixin.py +37 -57
- huggingface_hub/inference/_client.py +257 -325
- huggingface_hub/inference/_common.py +110 -124
- huggingface_hub/inference/_generated/_async_client.py +307 -432
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +2 -3
- huggingface_hub/inference/_mcp/mcp_client.py +58 -30
- huggingface_hub/inference/_mcp/types.py +10 -7
- huggingface_hub/inference/_mcp/utils.py +11 -7
- huggingface_hub/inference/_providers/__init__.py +4 -2
- huggingface_hub/inference/_providers/_common.py +49 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +52 -21
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +28 -20
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +20 -31
- huggingface_hub/repocard.py +18 -18
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +25 -21
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +4 -4
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +366 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +16 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +32 -11
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
- huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"""Contains a logger to push training logs to the Hub, using Tensorboard."""
|
|
15
15
|
|
|
16
16
|
from pathlib import Path
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
from ._commit_scheduler import CommitScheduler
|
|
20
20
|
from .errors import EntryNotFoundError
|
|
@@ -26,25 +26,24 @@ from .utils import experimental
|
|
|
26
26
|
# or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
|
|
27
27
|
# from either of them.
|
|
28
28
|
try:
|
|
29
|
-
from tensorboardX import SummaryWriter
|
|
29
|
+
from tensorboardX import SummaryWriter as _RuntimeSummaryWriter
|
|
30
30
|
|
|
31
31
|
is_summary_writer_available = True
|
|
32
|
-
|
|
33
32
|
except ImportError:
|
|
34
33
|
try:
|
|
35
|
-
from torch.utils.tensorboard import SummaryWriter
|
|
34
|
+
from torch.utils.tensorboard import SummaryWriter as _RuntimeSummaryWriter
|
|
36
35
|
|
|
37
|
-
is_summary_writer_available =
|
|
36
|
+
is_summary_writer_available = True
|
|
38
37
|
except ImportError:
|
|
39
38
|
# Dummy class to avoid failing at import. Will raise on instance creation.
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
class _DummySummaryWriter:
|
|
40
|
+
pass
|
|
42
41
|
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
_RuntimeSummaryWriter = _DummySummaryWriter # type: ignore[assignment]
|
|
43
|
+
is_summary_writer_available = False
|
|
45
44
|
|
|
46
45
|
|
|
47
|
-
class HFSummaryWriter(
|
|
46
|
+
class HFSummaryWriter(_RuntimeSummaryWriter):
|
|
48
47
|
"""
|
|
49
48
|
Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
|
|
50
49
|
|
|
@@ -78,10 +77,10 @@ class HFSummaryWriter(SummaryWriter):
|
|
|
78
77
|
Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
|
|
79
78
|
path_in_repo (`str`, *optional*):
|
|
80
79
|
The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/".
|
|
81
|
-
repo_allow_patterns (`
|
|
80
|
+
repo_allow_patterns (`list[str]` or `str`, *optional*):
|
|
82
81
|
A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the
|
|
83
82
|
[upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
|
|
84
|
-
repo_ignore_patterns (`
|
|
83
|
+
repo_ignore_patterns (`list[str]` or `str`, *optional*):
|
|
85
84
|
A list of patterns to exclude in the upload. Check out the
|
|
86
85
|
[upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
|
|
87
86
|
token (`str`, *optional*):
|
|
@@ -138,8 +137,8 @@ class HFSummaryWriter(SummaryWriter):
|
|
|
138
137
|
repo_revision: Optional[str] = None,
|
|
139
138
|
repo_private: Optional[bool] = None,
|
|
140
139
|
path_in_repo: Optional[str] = "tensorboard",
|
|
141
|
-
repo_allow_patterns: Optional[Union[
|
|
142
|
-
repo_ignore_patterns: Optional[Union[
|
|
140
|
+
repo_allow_patterns: Optional[Union[list[str], str]] = "*.tfevents.*",
|
|
141
|
+
repo_ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
143
142
|
token: Optional[str] = None,
|
|
144
143
|
**kwargs,
|
|
145
144
|
):
|
|
@@ -24,7 +24,7 @@ import traceback
|
|
|
24
24
|
from datetime import datetime
|
|
25
25
|
from pathlib import Path
|
|
26
26
|
from threading import Lock
|
|
27
|
-
from typing import TYPE_CHECKING,
|
|
27
|
+
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
28
28
|
from urllib.parse import quote
|
|
29
29
|
|
|
30
30
|
from . import constants
|
|
@@ -44,11 +44,113 @@ logger = logging.getLogger(__name__)
|
|
|
44
44
|
|
|
45
45
|
WAITING_TIME_IF_NO_TASKS = 10 # seconds
|
|
46
46
|
MAX_NB_FILES_FETCH_UPLOAD_MODE = 100
|
|
47
|
-
COMMIT_SIZE_SCALE:
|
|
47
|
+
COMMIT_SIZE_SCALE: list[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
|
|
48
48
|
|
|
49
49
|
UPLOAD_BATCH_SIZE_XET = 256 # Max 256 files per upload batch for XET-enabled repos
|
|
50
50
|
UPLOAD_BATCH_SIZE_LFS = 1 # Otherwise, batches of 1 for regular LFS upload
|
|
51
51
|
|
|
52
|
+
# Repository limits (from https://huggingface.co/docs/hub/repositories-recommendations)
|
|
53
|
+
MAX_FILES_PER_REPO = 100_000 # Recommended maximum number of files per repository
|
|
54
|
+
MAX_FILES_PER_FOLDER = 10_000 # Recommended maximum number of files per folder
|
|
55
|
+
MAX_FILE_SIZE_GB = 50 # Hard limit for individual file size
|
|
56
|
+
RECOMMENDED_FILE_SIZE_GB = 20 # Recommended maximum for individual file size
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _validate_upload_limits(paths_list: list[LocalUploadFilePaths]) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Validate upload against repository limits and warn about potential issues.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
paths_list: List of file paths to be uploaded
|
|
65
|
+
|
|
66
|
+
Warns about:
|
|
67
|
+
- Too many files in the repository (>100k)
|
|
68
|
+
- Too many entries (files or subdirectories) in a single folder (>10k)
|
|
69
|
+
- Files exceeding size limits (>20GB recommended, >50GB hard limit)
|
|
70
|
+
"""
|
|
71
|
+
logger.info("Running validation checks on files to upload...")
|
|
72
|
+
|
|
73
|
+
# Check 1: Total file count
|
|
74
|
+
if len(paths_list) > MAX_FILES_PER_REPO:
|
|
75
|
+
logger.warning(
|
|
76
|
+
f"You are about to upload {len(paths_list):,} files. "
|
|
77
|
+
f"This exceeds the recommended limit of {MAX_FILES_PER_REPO:,} files per repository.\n"
|
|
78
|
+
f"Consider:\n"
|
|
79
|
+
f" - Splitting your data into multiple repositories\n"
|
|
80
|
+
f" - Using fewer, larger files (e.g., parquet files)\n"
|
|
81
|
+
f" - See: https://huggingface.co/docs/hub/repositories-recommendations"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Check 2: Files and subdirectories per folder
|
|
85
|
+
# Track immediate children (files and subdirs) for each folder
|
|
86
|
+
from collections import defaultdict
|
|
87
|
+
|
|
88
|
+
entries_per_folder: dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
|
|
89
|
+
|
|
90
|
+
for paths in paths_list:
|
|
91
|
+
path = Path(paths.path_in_repo)
|
|
92
|
+
parts = path.parts
|
|
93
|
+
|
|
94
|
+
# Count this file in its immediate parent directory
|
|
95
|
+
parent = str(path.parent) if str(path.parent) != "." else "."
|
|
96
|
+
entries_per_folder[parent]["files"] += 1
|
|
97
|
+
|
|
98
|
+
# Track immediate subdirectories for each parent folder
|
|
99
|
+
# Walk through the path components to track parent-child relationships
|
|
100
|
+
for i, child in enumerate(parts[:-1]):
|
|
101
|
+
parent = "." if i == 0 else "/".join(parts[:i])
|
|
102
|
+
entries_per_folder[parent]["subdirs"].add(child)
|
|
103
|
+
|
|
104
|
+
# Check limits for each folder
|
|
105
|
+
for folder, data in entries_per_folder.items():
|
|
106
|
+
file_count = data["files"]
|
|
107
|
+
subdir_count = len(data["subdirs"])
|
|
108
|
+
total_entries = file_count + subdir_count
|
|
109
|
+
|
|
110
|
+
if total_entries > MAX_FILES_PER_FOLDER:
|
|
111
|
+
folder_display = "root" if folder == "." else folder
|
|
112
|
+
logger.warning(
|
|
113
|
+
f"Folder '{folder_display}' contains {total_entries:,} entries "
|
|
114
|
+
f"({file_count:,} files and {subdir_count:,} subdirectories). "
|
|
115
|
+
f"This exceeds the recommended {MAX_FILES_PER_FOLDER:,} entries per folder.\n"
|
|
116
|
+
"Consider reorganising into sub-folders."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Check 3: File sizes
|
|
120
|
+
large_files = []
|
|
121
|
+
very_large_files = []
|
|
122
|
+
|
|
123
|
+
for paths in paths_list:
|
|
124
|
+
size = paths.file_path.stat().st_size
|
|
125
|
+
size_gb = size / 1_000_000_000 # Use decimal GB as per Hub limits
|
|
126
|
+
|
|
127
|
+
if size_gb > MAX_FILE_SIZE_GB:
|
|
128
|
+
very_large_files.append((paths.path_in_repo, size_gb))
|
|
129
|
+
elif size_gb > RECOMMENDED_FILE_SIZE_GB:
|
|
130
|
+
large_files.append((paths.path_in_repo, size_gb))
|
|
131
|
+
|
|
132
|
+
# Warn about very large files (>50GB)
|
|
133
|
+
if very_large_files:
|
|
134
|
+
files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in very_large_files[:5])
|
|
135
|
+
more_str = f"\n ... and {len(very_large_files) - 5} more files" if len(very_large_files) > 5 else ""
|
|
136
|
+
logger.warning(
|
|
137
|
+
f"Found {len(very_large_files)} files exceeding the {MAX_FILE_SIZE_GB}GB hard limit:\n"
|
|
138
|
+
f" - {files_str}{more_str}\n"
|
|
139
|
+
f"These files may fail to upload. Consider splitting them into smaller chunks."
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Warn about large files (>20GB)
|
|
143
|
+
if large_files:
|
|
144
|
+
files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in large_files[:5])
|
|
145
|
+
more_str = f"\n ... and {len(large_files) - 5} more files" if len(large_files) > 5 else ""
|
|
146
|
+
logger.warning(
|
|
147
|
+
f"Found {len(large_files)} files larger than {RECOMMENDED_FILE_SIZE_GB}GB (recommended limit):\n"
|
|
148
|
+
f" - {files_str}{more_str}\n"
|
|
149
|
+
f"Large files may slow down loading and processing."
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
logger.info("Validation checks complete.")
|
|
153
|
+
|
|
52
154
|
|
|
53
155
|
def upload_large_folder_internal(
|
|
54
156
|
api: "HfApi",
|
|
@@ -58,8 +160,8 @@ def upload_large_folder_internal(
|
|
|
58
160
|
repo_type: str, # Repo type is required!
|
|
59
161
|
revision: Optional[str] = None,
|
|
60
162
|
private: Optional[bool] = None,
|
|
61
|
-
allow_patterns: Optional[Union[
|
|
62
|
-
ignore_patterns: Optional[Union[
|
|
163
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
164
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
63
165
|
num_workers: Optional[int] = None,
|
|
64
166
|
print_report: bool = True,
|
|
65
167
|
print_report_every: int = 60,
|
|
@@ -118,6 +220,11 @@ def upload_large_folder_internal(
|
|
|
118
220
|
paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
|
|
119
221
|
logger.info(f"Found {len(paths_list)} candidate files to upload")
|
|
120
222
|
|
|
223
|
+
# Validate upload against repository limits
|
|
224
|
+
_validate_upload_limits(paths_list)
|
|
225
|
+
|
|
226
|
+
logger.info("Starting upload...")
|
|
227
|
+
|
|
121
228
|
# Read metadata for each file
|
|
122
229
|
items = [
|
|
123
230
|
(paths, read_upload_metadata(folder_path, paths.path_in_repo))
|
|
@@ -177,13 +284,13 @@ class WorkerJob(enum.Enum):
|
|
|
177
284
|
WAIT = enum.auto() # if no tasks are available but we don't want to exit
|
|
178
285
|
|
|
179
286
|
|
|
180
|
-
JOB_ITEM_T =
|
|
287
|
+
JOB_ITEM_T = tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
|
|
181
288
|
|
|
182
289
|
|
|
183
290
|
class LargeUploadStatus:
|
|
184
291
|
"""Contains information, queues and tasks for a large upload process."""
|
|
185
292
|
|
|
186
|
-
def __init__(self, items:
|
|
293
|
+
def __init__(self, items: list[JOB_ITEM_T], upload_batch_size: int = 1):
|
|
187
294
|
self.items = items
|
|
188
295
|
self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
|
|
189
296
|
self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
|
|
@@ -316,7 +423,7 @@ def _worker_job(
|
|
|
316
423
|
Read `upload_large_folder` docstring for more information on how tasks are prioritized.
|
|
317
424
|
"""
|
|
318
425
|
while True:
|
|
319
|
-
next_job: Optional[
|
|
426
|
+
next_job: Optional[tuple[WorkerJob, list[JOB_ITEM_T]]] = None
|
|
320
427
|
|
|
321
428
|
# Determine next task
|
|
322
429
|
next_job = _determine_next_job(status)
|
|
@@ -409,7 +516,7 @@ def _worker_job(
|
|
|
409
516
|
status.nb_workers_waiting -= 1
|
|
410
517
|
|
|
411
518
|
|
|
412
|
-
def _determine_next_job(status: LargeUploadStatus) -> Optional[
|
|
519
|
+
def _determine_next_job(status: LargeUploadStatus) -> Optional[tuple[WorkerJob, list[JOB_ITEM_T]]]:
|
|
413
520
|
with status.lock:
|
|
414
521
|
# 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
|
|
415
522
|
if (
|
|
@@ -532,7 +639,7 @@ def _compute_sha256(item: JOB_ITEM_T) -> None:
|
|
|
532
639
|
metadata.save(paths)
|
|
533
640
|
|
|
534
641
|
|
|
535
|
-
def _get_upload_mode(items:
|
|
642
|
+
def _get_upload_mode(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
|
|
536
643
|
"""Get upload mode for each file and update metadata.
|
|
537
644
|
|
|
538
645
|
Also receive info if the file should be ignored.
|
|
@@ -554,7 +661,7 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
|
|
|
554
661
|
metadata.save(paths)
|
|
555
662
|
|
|
556
663
|
|
|
557
|
-
def _preupload_lfs(items:
|
|
664
|
+
def _preupload_lfs(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
|
|
558
665
|
"""Preupload LFS files and update metadata."""
|
|
559
666
|
additions = [_build_hacky_operation(item) for item in items]
|
|
560
667
|
api.preupload_lfs_files(
|
|
@@ -569,7 +676,7 @@ def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_typ
|
|
|
569
676
|
metadata.save(paths)
|
|
570
677
|
|
|
571
678
|
|
|
572
|
-
def _commit(items:
|
|
679
|
+
def _commit(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
|
|
573
680
|
"""Commit files to the repo."""
|
|
574
681
|
additions = [_build_hacky_operation(item) for item in items]
|
|
575
682
|
api.create_commit(
|
|
@@ -614,11 +721,11 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
|
|
|
614
721
|
####################
|
|
615
722
|
|
|
616
723
|
|
|
617
|
-
def _get_one(queue: "queue.Queue[JOB_ITEM_T]") ->
|
|
724
|
+
def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> list[JOB_ITEM_T]:
|
|
618
725
|
return [queue.get()]
|
|
619
726
|
|
|
620
727
|
|
|
621
|
-
def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) ->
|
|
728
|
+
def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> list[JOB_ITEM_T]:
|
|
622
729
|
return [queue.get() for _ in range(min(queue.qsize(), n))]
|
|
623
730
|
|
|
624
731
|
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Contains data structures to parse the webhooks payload."""
|
|
16
16
|
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Literal, Optional
|
|
18
18
|
|
|
19
19
|
from .utils import is_pydantic_available
|
|
20
20
|
|
|
@@ -116,7 +116,7 @@ class WebhookPayloadRepo(ObjectId):
|
|
|
116
116
|
name: str
|
|
117
117
|
private: bool
|
|
118
118
|
subdomain: Optional[str] = None
|
|
119
|
-
tags: Optional[
|
|
119
|
+
tags: Optional[list[str]] = None
|
|
120
120
|
type: Literal["dataset", "model", "space"]
|
|
121
121
|
url: WebhookPayloadUrl
|
|
122
122
|
|
|
@@ -134,4 +134,4 @@ class WebhookPayload(BaseModel):
|
|
|
134
134
|
comment: Optional[WebhookPayloadComment] = None
|
|
135
135
|
webhook: WebhookPayloadWebhook
|
|
136
136
|
movedTo: Optional[WebhookPayloadMovedTo] = None
|
|
137
|
-
updatedRefs: Optional[
|
|
137
|
+
updatedRefs: Optional[list[WebhookPayloadUpdatedRef]] = None
|
|
@@ -18,7 +18,7 @@ import atexit
|
|
|
18
18
|
import inspect
|
|
19
19
|
import os
|
|
20
20
|
from functools import wraps
|
|
21
|
-
from typing import TYPE_CHECKING, Any, Callable,
|
|
21
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
22
22
|
|
|
23
23
|
from .utils import experimental, is_fastapi_available, is_gradio_available
|
|
24
24
|
|
|
@@ -115,7 +115,7 @@ class WebhooksServer:
|
|
|
115
115
|
self._ui = ui
|
|
116
116
|
|
|
117
117
|
self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
|
|
118
|
-
self.registered_webhooks:
|
|
118
|
+
self.registered_webhooks: dict[str, Callable] = {}
|
|
119
119
|
_warn_on_empty_secret(self.webhook_secret)
|
|
120
120
|
|
|
121
121
|
def add_webhook(self, path: Optional[str] = None) -> Callable:
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"""Contains a utility for good-looking prints."""
|
|
15
15
|
|
|
16
16
|
import os
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Union
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class ANSI:
|
|
@@ -52,7 +52,7 @@ class ANSI:
|
|
|
52
52
|
return f"{code}{s}{cls._reset}"
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def tabulate(rows:
|
|
55
|
+
def tabulate(rows: list[list[Union[str, int]]], headers: list[str]) -> str:
|
|
56
56
|
"""
|
|
57
57
|
Inspired by:
|
|
58
58
|
|
huggingface_hub/cli/auth.py
CHANGED
|
@@ -31,12 +31,11 @@ Usage:
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
from argparse import _SubParsersAction
|
|
34
|
-
from typing import
|
|
35
|
-
|
|
36
|
-
from requests.exceptions import HTTPError
|
|
34
|
+
from typing import Optional
|
|
37
35
|
|
|
38
36
|
from huggingface_hub.commands import BaseHuggingfaceCLICommand
|
|
39
37
|
from huggingface_hub.constants import ENDPOINT
|
|
38
|
+
from huggingface_hub.errors import HfHubHTTPError
|
|
40
39
|
from huggingface_hub.hf_api import HfApi
|
|
41
40
|
|
|
42
41
|
from .._login import auth_list, auth_switch, login, logout
|
|
@@ -62,6 +61,9 @@ class AuthCommands(BaseHuggingfaceCLICommand):
|
|
|
62
61
|
auth_parser = parser.add_parser("auth", help="Manage authentication (login, logout, etc.).")
|
|
63
62
|
auth_subparsers = auth_parser.add_subparsers(help="Authentication subcommands")
|
|
64
63
|
|
|
64
|
+
# Show help if no subcommand is provided
|
|
65
|
+
auth_parser.set_defaults(func=lambda args: auth_parser.print_help())
|
|
66
|
+
|
|
65
67
|
# Add 'login' as a subcommand of 'auth'
|
|
66
68
|
login_parser = auth_subparsers.add_parser(
|
|
67
69
|
"login", help="Log in using a token from huggingface.co/settings/tokens"
|
|
@@ -170,7 +172,7 @@ class AuthSwitch(BaseAuthCommand):
|
|
|
170
172
|
except ValueError:
|
|
171
173
|
print("Invalid input. Please enter a number or 'q' to quit.")
|
|
172
174
|
|
|
173
|
-
def _select_token_name_tui(self, token_names:
|
|
175
|
+
def _select_token_name_tui(self, token_names: list[str]) -> Optional[str]:
|
|
174
176
|
choices = [Choice(token_name, name=token_name) for token_name in token_names]
|
|
175
177
|
try:
|
|
176
178
|
return inquirer.select(
|
|
@@ -197,14 +199,14 @@ class AuthWhoami(BaseAuthCommand):
|
|
|
197
199
|
exit()
|
|
198
200
|
try:
|
|
199
201
|
info = self._api.whoami(token)
|
|
200
|
-
print(info["name"])
|
|
202
|
+
print(ANSI.bold("user: "), info["name"])
|
|
201
203
|
orgs = [org["name"] for org in info["orgs"]]
|
|
202
204
|
if orgs:
|
|
203
205
|
print(ANSI.bold("orgs: "), ",".join(orgs))
|
|
204
206
|
|
|
205
207
|
if ENDPOINT != "https://huggingface.co":
|
|
206
208
|
print(f"Authenticated through private endpoint: {ENDPOINT}")
|
|
207
|
-
except
|
|
209
|
+
except HfHubHTTPError as e:
|
|
208
210
|
print(e)
|
|
209
211
|
print(ANSI.red(e.response.text))
|
|
210
212
|
exit(1)
|
huggingface_hub/cli/cache.py
CHANGED
|
@@ -19,15 +19,9 @@ import time
|
|
|
19
19
|
from argparse import Namespace, _SubParsersAction
|
|
20
20
|
from functools import wraps
|
|
21
21
|
from tempfile import mkstemp
|
|
22
|
-
from typing import Any, Callable, Iterable,
|
|
23
|
-
|
|
24
|
-
from ..utils import
|
|
25
|
-
CachedRepoInfo,
|
|
26
|
-
CachedRevisionInfo,
|
|
27
|
-
CacheNotFound,
|
|
28
|
-
HFCacheInfo,
|
|
29
|
-
scan_cache_dir,
|
|
30
|
-
)
|
|
22
|
+
from typing import Any, Callable, Iterable, Literal, Optional, Union
|
|
23
|
+
|
|
24
|
+
from ..utils import CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir
|
|
31
25
|
from . import BaseHuggingfaceCLICommand
|
|
32
26
|
from ._cli_utils import ANSI, tabulate
|
|
33
27
|
|
|
@@ -52,7 +46,7 @@ def require_inquirer_py(fn: Callable) -> Callable:
|
|
|
52
46
|
if not _inquirer_py_available:
|
|
53
47
|
raise ImportError(
|
|
54
48
|
"The 'cache delete' command requires extra dependencies for the TUI.\n"
|
|
55
|
-
"Please run 'pip install huggingface_hub[cli]' to install them.\n"
|
|
49
|
+
"Please run 'pip install \"huggingface_hub[cli]\"' to install them.\n"
|
|
56
50
|
"Otherwise, disable TUI using the '--disable-tui' flag."
|
|
57
51
|
)
|
|
58
52
|
return fn(*args, **kwargs)
|
|
@@ -65,6 +59,10 @@ class CacheCommand(BaseHuggingfaceCLICommand):
|
|
|
65
59
|
def register_subcommand(parser: _SubParsersAction):
|
|
66
60
|
cache_parser = parser.add_parser("cache", help="Manage local cache directory.")
|
|
67
61
|
cache_subparsers = cache_parser.add_subparsers(dest="cache_command", help="Cache subcommands")
|
|
62
|
+
|
|
63
|
+
# Show help if no subcommand is provided
|
|
64
|
+
cache_parser.set_defaults(func=lambda args: cache_parser.print_help())
|
|
65
|
+
|
|
68
66
|
# Scan subcommand
|
|
69
67
|
scan_parser = cache_subparsers.add_parser("scan", help="Scan cache directory.")
|
|
70
68
|
scan_parser.add_argument(
|
|
@@ -145,7 +143,7 @@ class CacheCommand(BaseHuggingfaceCLICommand):
|
|
|
145
143
|
if self.verbosity >= 3:
|
|
146
144
|
print(ANSI.gray(message))
|
|
147
145
|
for warning in hf_cache_info.warnings:
|
|
148
|
-
print(ANSI.gray(warning))
|
|
146
|
+
print(ANSI.gray(str(warning)))
|
|
149
147
|
else:
|
|
150
148
|
print(ANSI.gray(message + " Use -vvv to print details."))
|
|
151
149
|
|
|
@@ -245,8 +243,8 @@ def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_
|
|
|
245
243
|
|
|
246
244
|
@require_inquirer_py
|
|
247
245
|
def _manual_review_tui(
|
|
248
|
-
hf_cache_info: HFCacheInfo, preselected:
|
|
249
|
-
) ->
|
|
246
|
+
hf_cache_info: HFCacheInfo, preselected: list[str], sort_by: Optional[SortingOption_T] = None
|
|
247
|
+
) -> list[str]:
|
|
250
248
|
choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected, sort_by=sort_by)
|
|
251
249
|
checkbox = inquirer.checkbox(
|
|
252
250
|
message="Select revisions to delete:",
|
|
@@ -279,9 +277,9 @@ def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
|
|
|
279
277
|
|
|
280
278
|
|
|
281
279
|
def _get_tui_choices_from_scan(
|
|
282
|
-
repos: Iterable[CachedRepoInfo], preselected:
|
|
283
|
-
) ->
|
|
284
|
-
choices:
|
|
280
|
+
repos: Iterable[CachedRepoInfo], preselected: list[str], sort_by: Optional[SortingOption_T] = None
|
|
281
|
+
) -> list:
|
|
282
|
+
choices: list[Union["Choice", "Separator"]] = []
|
|
285
283
|
choices.append(
|
|
286
284
|
Choice(
|
|
287
285
|
_CANCEL_DELETION_STR, name="None of the following (if selected, nothing will be deleted).", enabled=False
|
|
@@ -308,8 +306,8 @@ def _get_tui_choices_from_scan(
|
|
|
308
306
|
|
|
309
307
|
|
|
310
308
|
def _manual_review_no_tui(
|
|
311
|
-
hf_cache_info: HFCacheInfo, preselected:
|
|
312
|
-
) ->
|
|
309
|
+
hf_cache_info: HFCacheInfo, preselected: list[str], sort_by: Optional[SortingOption_T] = None
|
|
310
|
+
) -> list[str]:
|
|
313
311
|
fd, tmp_path = mkstemp(suffix=".txt")
|
|
314
312
|
os.close(fd)
|
|
315
313
|
lines = []
|
|
@@ -360,14 +358,14 @@ def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
|
|
|
360
358
|
print(f"Invalid input. Must be one of {ALL}")
|
|
361
359
|
|
|
362
360
|
|
|
363
|
-
def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes:
|
|
361
|
+
def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: list[str]) -> str:
|
|
364
362
|
if _CANCEL_DELETION_STR in selected_hashes:
|
|
365
363
|
return "Nothing will be deleted."
|
|
366
364
|
strategy = hf_cache_info.delete_revisions(*selected_hashes)
|
|
367
365
|
return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}."
|
|
368
366
|
|
|
369
367
|
|
|
370
|
-
def _read_manual_review_tmp_file(tmp_path: str) ->
|
|
368
|
+
def _read_manual_review_tmp_file(tmp_path: str) -> list[str]:
|
|
371
369
|
with open(tmp_path) as f:
|
|
372
370
|
content = f.read()
|
|
373
371
|
lines = [line.strip() for line in content.split("\n")]
|
huggingface_hub/cli/download.py
CHANGED
|
@@ -38,7 +38,7 @@ Usage:
|
|
|
38
38
|
|
|
39
39
|
import warnings
|
|
40
40
|
from argparse import Namespace, _SubParsersAction
|
|
41
|
-
from typing import
|
|
41
|
+
from typing import Optional
|
|
42
42
|
|
|
43
43
|
from huggingface_hub import logging
|
|
44
44
|
from huggingface_hub._snapshot_download import snapshot_download
|
|
@@ -113,11 +113,11 @@ class DownloadCommand(BaseHuggingfaceCLICommand):
|
|
|
113
113
|
def __init__(self, args: Namespace) -> None:
|
|
114
114
|
self.token = args.token
|
|
115
115
|
self.repo_id: str = args.repo_id
|
|
116
|
-
self.filenames:
|
|
116
|
+
self.filenames: list[str] = args.filenames
|
|
117
117
|
self.repo_type: str = args.repo_type
|
|
118
118
|
self.revision: Optional[str] = args.revision
|
|
119
|
-
self.include: Optional[
|
|
120
|
-
self.exclude: Optional[
|
|
119
|
+
self.include: Optional[list[str]] = args.include
|
|
120
|
+
self.exclude: Optional[list[str]] = args.exclude
|
|
121
121
|
self.cache_dir: Optional[str] = args.cache_dir
|
|
122
122
|
self.local_dir: Optional[str] = args.local_dir
|
|
123
123
|
self.force_download: bool = args.force_download
|
huggingface_hub/cli/hf.py
CHANGED
|
@@ -47,10 +47,6 @@ def main():
|
|
|
47
47
|
# LFS commands (hidden in --help)
|
|
48
48
|
LfsCommands.register_subcommand(commands_parser)
|
|
49
49
|
|
|
50
|
-
# Legacy commands
|
|
51
|
-
|
|
52
|
-
# Experimental
|
|
53
|
-
|
|
54
50
|
# Let's go
|
|
55
51
|
args = parser.parse_args()
|
|
56
52
|
if not hasattr(args, "func"):
|
|
@@ -59,7 +55,8 @@ def main():
|
|
|
59
55
|
|
|
60
56
|
# Run
|
|
61
57
|
service = args.func(args)
|
|
62
|
-
service
|
|
58
|
+
if service is not None:
|
|
59
|
+
service.run()
|
|
63
60
|
|
|
64
61
|
|
|
65
62
|
if __name__ == "__main__":
|