huggingface-hub 0.29.3rc0__py3-none-any.whl → 0.30.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +16 -1
- huggingface_hub/_commit_api.py +142 -4
- huggingface_hub/_space_api.py +15 -2
- huggingface_hub/_webhooks_server.py +2 -0
- huggingface_hub/commands/delete_cache.py +66 -20
- huggingface_hub/commands/upload.py +16 -2
- huggingface_hub/constants.py +44 -7
- huggingface_hub/errors.py +19 -0
- huggingface_hub/file_download.py +163 -35
- huggingface_hub/hf_api.py +349 -28
- huggingface_hub/hub_mixin.py +19 -4
- huggingface_hub/inference/_client.py +50 -69
- huggingface_hub/inference/_generated/_async_client.py +57 -76
- huggingface_hub/inference/_generated/types/__init__.py +1 -0
- huggingface_hub/inference/_generated/types/chat_completion.py +20 -10
- huggingface_hub/inference/_generated/types/image_to_image.py +2 -0
- huggingface_hub/inference/_providers/__init__.py +7 -1
- huggingface_hub/inference/_providers/_common.py +9 -5
- huggingface_hub/inference/_providers/black_forest_labs.py +5 -5
- huggingface_hub/inference/_providers/cohere.py +1 -1
- huggingface_hub/inference/_providers/fal_ai.py +64 -7
- huggingface_hub/inference/_providers/fireworks_ai.py +4 -1
- huggingface_hub/inference/_providers/hf_inference.py +41 -4
- huggingface_hub/inference/_providers/hyperbolic.py +3 -3
- huggingface_hub/inference/_providers/nebius.py +3 -3
- huggingface_hub/inference/_providers/novita.py +35 -5
- huggingface_hub/inference/_providers/openai.py +22 -0
- huggingface_hub/inference/_providers/replicate.py +3 -3
- huggingface_hub/inference/_providers/together.py +3 -3
- huggingface_hub/utils/__init__.py +8 -0
- huggingface_hub/utils/_http.py +4 -1
- huggingface_hub/utils/_runtime.py +11 -0
- huggingface_hub/utils/_xet.py +199 -0
- huggingface_hub/utils/tqdm.py +30 -2
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/METADATA +3 -1
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/RECORD +40 -38
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/top_level.txt +0 -0
huggingface_hub/__init__.py
CHANGED
|
@@ -46,7 +46,7 @@ import sys
|
|
|
46
46
|
from typing import TYPE_CHECKING
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
__version__ = "0.
|
|
49
|
+
__version__ = "0.30.0rc1"
|
|
50
50
|
|
|
51
51
|
# Alphabetical order of definitions is ensured in tests
|
|
52
52
|
# WARNING: any comment added in this dictionary definition will be lost when
|
|
@@ -165,6 +165,7 @@ _SUBMOD_ATTRS = {
|
|
|
165
165
|
"create_commit",
|
|
166
166
|
"create_discussion",
|
|
167
167
|
"create_inference_endpoint",
|
|
168
|
+
"create_inference_endpoint_from_catalog",
|
|
168
169
|
"create_pull_request",
|
|
169
170
|
"create_repo",
|
|
170
171
|
"create_tag",
|
|
@@ -205,7 +206,9 @@ _SUBMOD_ATTRS = {
|
|
|
205
206
|
"list_accepted_access_requests",
|
|
206
207
|
"list_collections",
|
|
207
208
|
"list_datasets",
|
|
209
|
+
"list_inference_catalog",
|
|
208
210
|
"list_inference_endpoints",
|
|
211
|
+
"list_lfs_files",
|
|
209
212
|
"list_liked_repos",
|
|
210
213
|
"list_models",
|
|
211
214
|
"list_organization_members",
|
|
@@ -228,6 +231,7 @@ _SUBMOD_ATTRS = {
|
|
|
228
231
|
"parse_safetensors_file_metadata",
|
|
229
232
|
"pause_inference_endpoint",
|
|
230
233
|
"pause_space",
|
|
234
|
+
"permanently_delete_lfs_files",
|
|
231
235
|
"preupload_lfs_files",
|
|
232
236
|
"reject_access_request",
|
|
233
237
|
"rename_discussion",
|
|
@@ -296,6 +300,7 @@ _SUBMOD_ATTRS = {
|
|
|
296
300
|
"ChatCompletionInputMessageChunkType",
|
|
297
301
|
"ChatCompletionInputStreamOptions",
|
|
298
302
|
"ChatCompletionInputTool",
|
|
303
|
+
"ChatCompletionInputToolCall",
|
|
299
304
|
"ChatCompletionInputToolChoiceClass",
|
|
300
305
|
"ChatCompletionInputToolChoiceEnum",
|
|
301
306
|
"ChatCompletionInputURL",
|
|
@@ -536,6 +541,7 @@ __all__ = [
|
|
|
536
541
|
"ChatCompletionInputMessageChunkType",
|
|
537
542
|
"ChatCompletionInputStreamOptions",
|
|
538
543
|
"ChatCompletionInputTool",
|
|
544
|
+
"ChatCompletionInputToolCall",
|
|
539
545
|
"ChatCompletionInputToolChoiceClass",
|
|
540
546
|
"ChatCompletionInputToolChoiceEnum",
|
|
541
547
|
"ChatCompletionInputURL",
|
|
@@ -769,6 +775,7 @@ __all__ = [
|
|
|
769
775
|
"create_commit",
|
|
770
776
|
"create_discussion",
|
|
771
777
|
"create_inference_endpoint",
|
|
778
|
+
"create_inference_endpoint_from_catalog",
|
|
772
779
|
"create_pull_request",
|
|
773
780
|
"create_repo",
|
|
774
781
|
"create_tag",
|
|
@@ -823,7 +830,9 @@ __all__ = [
|
|
|
823
830
|
"list_accepted_access_requests",
|
|
824
831
|
"list_collections",
|
|
825
832
|
"list_datasets",
|
|
833
|
+
"list_inference_catalog",
|
|
826
834
|
"list_inference_endpoints",
|
|
835
|
+
"list_lfs_files",
|
|
827
836
|
"list_liked_repos",
|
|
828
837
|
"list_models",
|
|
829
838
|
"list_organization_members",
|
|
@@ -856,6 +865,7 @@ __all__ = [
|
|
|
856
865
|
"parse_safetensors_file_metadata",
|
|
857
866
|
"pause_inference_endpoint",
|
|
858
867
|
"pause_space",
|
|
868
|
+
"permanently_delete_lfs_files",
|
|
859
869
|
"preupload_lfs_files",
|
|
860
870
|
"push_to_hub_fastai",
|
|
861
871
|
"push_to_hub_keras",
|
|
@@ -1107,6 +1117,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1107
1117
|
create_commit, # noqa: F401
|
|
1108
1118
|
create_discussion, # noqa: F401
|
|
1109
1119
|
create_inference_endpoint, # noqa: F401
|
|
1120
|
+
create_inference_endpoint_from_catalog, # noqa: F401
|
|
1110
1121
|
create_pull_request, # noqa: F401
|
|
1111
1122
|
create_repo, # noqa: F401
|
|
1112
1123
|
create_tag, # noqa: F401
|
|
@@ -1147,7 +1158,9 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1147
1158
|
list_accepted_access_requests, # noqa: F401
|
|
1148
1159
|
list_collections, # noqa: F401
|
|
1149
1160
|
list_datasets, # noqa: F401
|
|
1161
|
+
list_inference_catalog, # noqa: F401
|
|
1150
1162
|
list_inference_endpoints, # noqa: F401
|
|
1163
|
+
list_lfs_files, # noqa: F401
|
|
1151
1164
|
list_liked_repos, # noqa: F401
|
|
1152
1165
|
list_models, # noqa: F401
|
|
1153
1166
|
list_organization_members, # noqa: F401
|
|
@@ -1170,6 +1183,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1170
1183
|
parse_safetensors_file_metadata, # noqa: F401
|
|
1171
1184
|
pause_inference_endpoint, # noqa: F401
|
|
1172
1185
|
pause_space, # noqa: F401
|
|
1186
|
+
permanently_delete_lfs_files, # noqa: F401
|
|
1173
1187
|
preupload_lfs_files, # noqa: F401
|
|
1174
1188
|
reject_access_request, # noqa: F401
|
|
1175
1189
|
rename_discussion, # noqa: F401
|
|
@@ -1236,6 +1250,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1236
1250
|
ChatCompletionInputMessageChunkType, # noqa: F401
|
|
1237
1251
|
ChatCompletionInputStreamOptions, # noqa: F401
|
|
1238
1252
|
ChatCompletionInputTool, # noqa: F401
|
|
1253
|
+
ChatCompletionInputToolCall, # noqa: F401
|
|
1239
1254
|
ChatCompletionInputToolChoiceClass, # noqa: F401
|
|
1240
1255
|
ChatCompletionInputToolChoiceEnum, # noqa: F401
|
|
1241
1256
|
ChatCompletionInputURL, # noqa: F401
|
huggingface_hub/_commit_api.py
CHANGED
|
@@ -4,6 +4,7 @@ Type definitions and utilities for the `create_commit` API
|
|
|
4
4
|
|
|
5
5
|
import base64
|
|
6
6
|
import io
|
|
7
|
+
import math
|
|
7
8
|
import os
|
|
8
9
|
import warnings
|
|
9
10
|
from collections import defaultdict
|
|
@@ -16,12 +17,14 @@ from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List,
|
|
|
16
17
|
from tqdm.contrib.concurrent import thread_map
|
|
17
18
|
|
|
18
19
|
from . import constants
|
|
19
|
-
from .errors import EntryNotFoundError
|
|
20
|
+
from .errors import EntryNotFoundError, HfHubHTTPError, XetAuthorizationError, XetRefreshTokenError
|
|
20
21
|
from .file_download import hf_hub_url
|
|
21
22
|
from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
|
|
22
23
|
from .utils import (
|
|
23
24
|
FORBIDDEN_FOLDERS,
|
|
25
|
+
XetTokenType,
|
|
24
26
|
chunk_iterable,
|
|
27
|
+
fetch_xet_connection_info_from_repo_info,
|
|
25
28
|
get_session,
|
|
26
29
|
hf_raise_for_status,
|
|
27
30
|
logging,
|
|
@@ -30,6 +33,7 @@ from .utils import (
|
|
|
30
33
|
validate_hf_hub_args,
|
|
31
34
|
)
|
|
32
35
|
from .utils import tqdm as hf_tqdm
|
|
36
|
+
from .utils.tqdm import _get_progress_bar_context
|
|
33
37
|
|
|
34
38
|
|
|
35
39
|
if TYPE_CHECKING:
|
|
@@ -47,6 +51,8 @@ UploadMode = Literal["lfs", "regular"]
|
|
|
47
51
|
# See https://github.com/huggingface/huggingface_hub/issues/1503
|
|
48
52
|
FETCH_LFS_BATCH_SIZE = 500
|
|
49
53
|
|
|
54
|
+
UPLOAD_BATCH_MAX_NUM_FILES = 256
|
|
55
|
+
|
|
50
56
|
|
|
51
57
|
@dataclass
|
|
52
58
|
class CommitOperationDelete:
|
|
@@ -391,7 +397,7 @@ def _upload_lfs_files(
|
|
|
391
397
|
# Upload instructions are retrieved by chunk of 256 files to avoid reaching
|
|
392
398
|
# the payload limit.
|
|
393
399
|
batch_actions: List[Dict] = []
|
|
394
|
-
for chunk in chunk_iterable(additions, chunk_size=
|
|
400
|
+
for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES):
|
|
395
401
|
batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
|
|
396
402
|
upload_infos=[op.upload_info for op in chunk],
|
|
397
403
|
repo_id=repo_id,
|
|
@@ -458,6 +464,138 @@ def _upload_lfs_files(
|
|
|
458
464
|
)
|
|
459
465
|
|
|
460
466
|
|
|
467
|
+
@validate_hf_hub_args
|
|
468
|
+
def _upload_xet_files(
|
|
469
|
+
*,
|
|
470
|
+
additions: List[CommitOperationAdd],
|
|
471
|
+
repo_type: str,
|
|
472
|
+
repo_id: str,
|
|
473
|
+
headers: Dict[str, str],
|
|
474
|
+
endpoint: Optional[str] = None,
|
|
475
|
+
revision: Optional[str] = None,
|
|
476
|
+
create_pr: Optional[bool] = None,
|
|
477
|
+
):
|
|
478
|
+
"""
|
|
479
|
+
Uploads the content of `additions` to the Hub using the xet storage protocol.
|
|
480
|
+
This chunks the files and deduplicates the chunks before uploading them to xetcas storage.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
additions (`List` of `CommitOperationAdd`):
|
|
484
|
+
The files to be uploaded.
|
|
485
|
+
repo_type (`str`):
|
|
486
|
+
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
|
|
487
|
+
repo_id (`str`):
|
|
488
|
+
A namespace (user or an organization) and a repo name separated
|
|
489
|
+
by a `/`.
|
|
490
|
+
headers (`Dict[str, str]`):
|
|
491
|
+
Headers to use for the request, including authorization headers and user agent.
|
|
492
|
+
endpoint: (`str`, *optional*):
|
|
493
|
+
The endpoint to use for the xetcas service. Defaults to `constants.ENDPOINT`.
|
|
494
|
+
revision (`str`, *optional*):
|
|
495
|
+
The git revision to upload to.
|
|
496
|
+
create_pr (`bool`, *optional*):
|
|
497
|
+
Whether or not to create a Pull Request with that commit.
|
|
498
|
+
|
|
499
|
+
Raises:
|
|
500
|
+
[`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
|
|
501
|
+
If an upload failed for any reason.
|
|
502
|
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
503
|
+
If the server returns malformed responses or if the user is unauthorized to upload to xet storage.
|
|
504
|
+
[`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
|
|
505
|
+
If the LFS batch endpoint returned an HTTP error.
|
|
506
|
+
|
|
507
|
+
**How it works:**
|
|
508
|
+
The file download system uses Xet storage, which is a content-addressable storage system that breaks files into chunks
|
|
509
|
+
for efficient storage and transfer.
|
|
510
|
+
|
|
511
|
+
`hf_xet.upload_files` manages uploading files by:
|
|
512
|
+
- Taking a list of file paths to upload
|
|
513
|
+
- Breaking files into smaller chunks for efficient storage
|
|
514
|
+
- Avoiding duplicate storage by recognizing identical chunks across files
|
|
515
|
+
- Connecting to a storage server (CAS server) that manages these chunks
|
|
516
|
+
|
|
517
|
+
The upload process works like this:
|
|
518
|
+
1. Create a local folder at ~/.cache/huggingface/xet/chunk-cache to store file chunks for reuse.
|
|
519
|
+
2. Process files in parallel (up to 8 files at once):
|
|
520
|
+
2.1. Read the file content.
|
|
521
|
+
2.2. Split the file content into smaller chunks based on content patterns: each chunk gets a unique ID based on what's in it.
|
|
522
|
+
2.3. For each chunk:
|
|
523
|
+
- Check if it already exists in storage.
|
|
524
|
+
- Skip uploading chunks that already exist.
|
|
525
|
+
2.4. Group chunks into larger blocks for efficient transfer.
|
|
526
|
+
2.5. Upload these blocks to the storage server.
|
|
527
|
+
2.6. Create and upload information about how the file is structured.
|
|
528
|
+
3. Return reference files that contain information about the uploaded files, which can be used later to download them.
|
|
529
|
+
"""
|
|
530
|
+
if len(additions) == 0:
|
|
531
|
+
return
|
|
532
|
+
# at this point, we know that hf_xet is installed
|
|
533
|
+
from hf_xet import upload_files
|
|
534
|
+
|
|
535
|
+
try:
|
|
536
|
+
xet_connection_info = fetch_xet_connection_info_from_repo_info(
|
|
537
|
+
token_type=XetTokenType.WRITE,
|
|
538
|
+
repo_id=repo_id,
|
|
539
|
+
repo_type=repo_type,
|
|
540
|
+
revision=revision,
|
|
541
|
+
headers=headers,
|
|
542
|
+
endpoint=endpoint,
|
|
543
|
+
params={"create_pr": "1"} if create_pr else None,
|
|
544
|
+
)
|
|
545
|
+
except HfHubHTTPError as e:
|
|
546
|
+
if e.response.status_code == 401:
|
|
547
|
+
raise XetAuthorizationError(
|
|
548
|
+
f"You are unauthorized to upload to xet storage for {repo_type}/{repo_id}. "
|
|
549
|
+
f"Please check that you have configured your access token with write access to the repo."
|
|
550
|
+
) from e
|
|
551
|
+
raise
|
|
552
|
+
|
|
553
|
+
xet_endpoint = xet_connection_info.endpoint
|
|
554
|
+
access_token_info = (xet_connection_info.access_token, xet_connection_info.expiration_unix_epoch)
|
|
555
|
+
|
|
556
|
+
def token_refresher() -> Tuple[str, int]:
|
|
557
|
+
new_xet_connection = fetch_xet_connection_info_from_repo_info(
|
|
558
|
+
token_type=XetTokenType.WRITE,
|
|
559
|
+
repo_id=repo_id,
|
|
560
|
+
repo_type=repo_type,
|
|
561
|
+
revision=revision,
|
|
562
|
+
headers=headers,
|
|
563
|
+
endpoint=endpoint,
|
|
564
|
+
params={"create_pr": "1"} if create_pr else None,
|
|
565
|
+
)
|
|
566
|
+
if new_xet_connection is None:
|
|
567
|
+
raise XetRefreshTokenError("Failed to refresh xet token")
|
|
568
|
+
return new_xet_connection.access_token, new_xet_connection.expiration_unix_epoch
|
|
569
|
+
|
|
570
|
+
num_chunks = math.ceil(len(additions) / UPLOAD_BATCH_MAX_NUM_FILES)
|
|
571
|
+
num_chunks_num_digits = int(math.log10(num_chunks)) + 1
|
|
572
|
+
for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
|
|
573
|
+
_chunk = [op for op in chunk]
|
|
574
|
+
paths = [str(op.path_or_fileobj) for op in _chunk]
|
|
575
|
+
expected_size = sum([os.path.getsize(path) for path in paths])
|
|
576
|
+
|
|
577
|
+
if num_chunks > 1:
|
|
578
|
+
description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
|
|
579
|
+
else:
|
|
580
|
+
description = "Uploading..."
|
|
581
|
+
progress_cm = _get_progress_bar_context(
|
|
582
|
+
desc=description,
|
|
583
|
+
total=expected_size,
|
|
584
|
+
initial=0,
|
|
585
|
+
unit="B",
|
|
586
|
+
unit_scale=True,
|
|
587
|
+
name="huggingface_hub.xet_put",
|
|
588
|
+
log_level=logger.getEffectiveLevel(),
|
|
589
|
+
)
|
|
590
|
+
with progress_cm as progress:
|
|
591
|
+
|
|
592
|
+
def update_progress(increment: int):
|
|
593
|
+
progress.update(increment)
|
|
594
|
+
|
|
595
|
+
upload_files(paths, xet_endpoint, access_token_info, token_refresher, update_progress, repo_type)
|
|
596
|
+
return
|
|
597
|
+
|
|
598
|
+
|
|
461
599
|
def _validate_preupload_info(preupload_info: dict):
|
|
462
600
|
files = preupload_info.get("files")
|
|
463
601
|
if not isinstance(files, list):
|
|
@@ -485,8 +623,8 @@ def _fetch_upload_modes(
|
|
|
485
623
|
gitignore_content: Optional[str] = None,
|
|
486
624
|
) -> None:
|
|
487
625
|
"""
|
|
488
|
-
Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob
|
|
489
|
-
|
|
626
|
+
Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob,
|
|
627
|
+
as a git LFS blob, or as a XET file. Input `additions` are mutated in-place with the upload mode.
|
|
490
628
|
|
|
491
629
|
Args:
|
|
492
630
|
additions (`Iterable` of :class:`CommitOperationAdd`):
|
huggingface_hub/_space_api.py
CHANGED
|
@@ -54,21 +54,34 @@ class SpaceHardware(str, Enum):
|
|
|
54
54
|
assert SpaceHardware.CPU_BASIC == "cpu-basic"
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/
|
|
57
|
+
Taken from https://github.com/huggingface-internal/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts (private url).
|
|
58
58
|
"""
|
|
59
59
|
|
|
60
|
+
# CPU
|
|
60
61
|
CPU_BASIC = "cpu-basic"
|
|
61
62
|
CPU_UPGRADE = "cpu-upgrade"
|
|
63
|
+
CPU_XL = "cpu-xl"
|
|
64
|
+
|
|
65
|
+
# ZeroGPU
|
|
66
|
+
ZERO_A10G = "zero-a10g"
|
|
67
|
+
|
|
68
|
+
# GPU
|
|
62
69
|
T4_SMALL = "t4-small"
|
|
63
70
|
T4_MEDIUM = "t4-medium"
|
|
64
71
|
L4X1 = "l4x1"
|
|
65
72
|
L4X4 = "l4x4"
|
|
66
|
-
|
|
73
|
+
L40SX1 = "l40sx1"
|
|
74
|
+
L40SX4 = "l40sx4"
|
|
75
|
+
L40SX8 = "l40sx8"
|
|
67
76
|
A10G_SMALL = "a10g-small"
|
|
68
77
|
A10G_LARGE = "a10g-large"
|
|
69
78
|
A10G_LARGEX2 = "a10g-largex2"
|
|
70
79
|
A10G_LARGEX4 = "a10g-largex4"
|
|
71
80
|
A100_LARGE = "a100-large"
|
|
81
|
+
H100 = "h100"
|
|
82
|
+
H100X8 = "h100x8"
|
|
83
|
+
|
|
84
|
+
# TPU
|
|
72
85
|
V5E_1X1 = "v5e-1x1"
|
|
73
86
|
V5E_2X2 = "v5e-2x2"
|
|
74
87
|
V5E_2X4 = "v5e-2x4"
|
|
@@ -186,6 +186,8 @@ class WebhooksServer:
|
|
|
186
186
|
# Print instructions and block main thread
|
|
187
187
|
space_host = os.environ.get("SPACE_HOST")
|
|
188
188
|
url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
|
|
189
|
+
if url is None:
|
|
190
|
+
raise ValueError("Cannot find the URL of the app. Please provide a valid `ui` or update `gradio` version.")
|
|
189
191
|
url = url.strip("/")
|
|
190
192
|
message = "\nWebhooks are correctly setup and ready to use:"
|
|
191
193
|
message += "\n" + "\n".join(f" - POST {url}{webhook}" for webhook in self.registered_webhooks)
|
|
@@ -18,6 +18,7 @@ Usage:
|
|
|
18
18
|
huggingface-cli delete-cache
|
|
19
19
|
huggingface-cli delete-cache --disable-tui
|
|
20
20
|
huggingface-cli delete-cache --dir ~/.cache/huggingface/hub
|
|
21
|
+
huggingface-cli delete-cache --sort=size
|
|
21
22
|
|
|
22
23
|
NOTE:
|
|
23
24
|
This command is based on `InquirerPy` to build the multiselect menu in the terminal.
|
|
@@ -50,7 +51,6 @@ NOTE:
|
|
|
50
51
|
TODO: add support for `huggingface-cli delete-cache aaaaaa bbbbbb cccccc (...)` ?
|
|
51
52
|
TODO: add "--keep-last" arg to delete revisions that are not on `main` ref
|
|
52
53
|
TODO: add "--filter" arg to filter repositories by name ?
|
|
53
|
-
TODO: add "--sort" arg to sort by size ?
|
|
54
54
|
TODO: add "--limit" arg to limit to X repos ?
|
|
55
55
|
TODO: add "-y" arg for immediate deletion ?
|
|
56
56
|
See discussions in https://github.com/huggingface/huggingface_hub/issues/1025.
|
|
@@ -60,7 +60,7 @@ import os
|
|
|
60
60
|
from argparse import Namespace, _SubParsersAction
|
|
61
61
|
from functools import wraps
|
|
62
62
|
from tempfile import mkstemp
|
|
63
|
-
from typing import Any, Callable, Iterable, List, Optional, Union
|
|
63
|
+
from typing import Any, Callable, Iterable, List, Literal, Optional, Union
|
|
64
64
|
|
|
65
65
|
from ..utils import CachedRepoInfo, CachedRevisionInfo, HFCacheInfo, scan_cache_dir
|
|
66
66
|
from . import BaseHuggingfaceCLICommand
|
|
@@ -76,6 +76,8 @@ try:
|
|
|
76
76
|
except ImportError:
|
|
77
77
|
_inquirer_py_available = False
|
|
78
78
|
|
|
79
|
+
SortingOption_T = Literal["alphabetical", "lastUpdated", "lastUsed", "size"]
|
|
80
|
+
|
|
79
81
|
|
|
80
82
|
def require_inquirer_py(fn: Callable) -> Callable:
|
|
81
83
|
"""Decorator to flag methods that require `InquirerPy`."""
|
|
@@ -120,11 +122,25 @@ class DeleteCacheCommand(BaseHuggingfaceCLICommand):
|
|
|
120
122
|
),
|
|
121
123
|
)
|
|
122
124
|
|
|
125
|
+
delete_cache_parser.add_argument(
|
|
126
|
+
"--sort",
|
|
127
|
+
nargs="?",
|
|
128
|
+
choices=["alphabetical", "lastUpdated", "lastUsed", "size"],
|
|
129
|
+
help=(
|
|
130
|
+
"Sort repositories by the specified criteria. Options: "
|
|
131
|
+
"'alphabetical' (A-Z), "
|
|
132
|
+
"'lastUpdated' (newest first), "
|
|
133
|
+
"'lastUsed' (most recent first), "
|
|
134
|
+
"'size' (largest first)."
|
|
135
|
+
),
|
|
136
|
+
)
|
|
137
|
+
|
|
123
138
|
delete_cache_parser.set_defaults(func=DeleteCacheCommand)
|
|
124
139
|
|
|
125
140
|
def __init__(self, args: Namespace) -> None:
|
|
126
141
|
self.cache_dir: Optional[str] = args.dir
|
|
127
142
|
self.disable_tui: bool = args.disable_tui
|
|
143
|
+
self.sort_by: Optional[SortingOption_T] = args.sort
|
|
128
144
|
|
|
129
145
|
def run(self):
|
|
130
146
|
"""Run `delete-cache` command with or without TUI."""
|
|
@@ -133,9 +149,9 @@ class DeleteCacheCommand(BaseHuggingfaceCLICommand):
|
|
|
133
149
|
|
|
134
150
|
# Manual review from the user
|
|
135
151
|
if self.disable_tui:
|
|
136
|
-
selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[])
|
|
152
|
+
selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
|
|
137
153
|
else:
|
|
138
|
-
selected_hashes = _manual_review_tui(hf_cache_info, preselected=[])
|
|
154
|
+
selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
|
|
139
155
|
|
|
140
156
|
# If deletion is not cancelled
|
|
141
157
|
if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
|
|
@@ -163,14 +179,35 @@ class DeleteCacheCommand(BaseHuggingfaceCLICommand):
|
|
|
163
179
|
print("Deletion is cancelled. Do nothing.")
|
|
164
180
|
|
|
165
181
|
|
|
182
|
+
def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_T] = None):
|
|
183
|
+
if sort_by == "alphabetical":
|
|
184
|
+
return (repo.repo_type, repo.repo_id.lower()) # by type then name
|
|
185
|
+
elif sort_by == "lastUpdated":
|
|
186
|
+
return -max(rev.last_modified for rev in repo.revisions) # newest first
|
|
187
|
+
elif sort_by == "lastUsed":
|
|
188
|
+
return -repo.last_accessed # most recently used first
|
|
189
|
+
elif sort_by == "size":
|
|
190
|
+
return -repo.size_on_disk # largest first
|
|
191
|
+
else:
|
|
192
|
+
return (repo.repo_type, repo.repo_id) # default stable order
|
|
193
|
+
|
|
194
|
+
|
|
166
195
|
@require_inquirer_py
|
|
167
|
-
def _manual_review_tui(
|
|
196
|
+
def _manual_review_tui(
|
|
197
|
+
hf_cache_info: HFCacheInfo,
|
|
198
|
+
preselected: List[str],
|
|
199
|
+
sort_by: Optional[SortingOption_T] = None,
|
|
200
|
+
) -> List[str]:
|
|
168
201
|
"""Ask the user for a manual review of the revisions to delete.
|
|
169
202
|
|
|
170
203
|
Displays a multi-select menu in the terminal (TUI).
|
|
171
204
|
"""
|
|
172
205
|
# Define multiselect list
|
|
173
|
-
choices = _get_tui_choices_from_scan(
|
|
206
|
+
choices = _get_tui_choices_from_scan(
|
|
207
|
+
repos=hf_cache_info.repos,
|
|
208
|
+
preselected=preselected,
|
|
209
|
+
sort_by=sort_by,
|
|
210
|
+
)
|
|
174
211
|
checkbox = inquirer.checkbox(
|
|
175
212
|
message="Select revisions to delete:",
|
|
176
213
|
choices=choices, # List of revisions with some pre-selection
|
|
@@ -213,7 +250,11 @@ def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
|
|
|
213
250
|
return inquirer.confirm(message, default=default).execute()
|
|
214
251
|
|
|
215
252
|
|
|
216
|
-
def _get_tui_choices_from_scan(
|
|
253
|
+
def _get_tui_choices_from_scan(
|
|
254
|
+
repos: Iterable[CachedRepoInfo],
|
|
255
|
+
preselected: List[str],
|
|
256
|
+
sort_by: Optional[SortingOption_T] = None,
|
|
257
|
+
) -> List:
|
|
217
258
|
"""Build a list of choices from the scanned repos.
|
|
218
259
|
|
|
219
260
|
Args:
|
|
@@ -221,14 +262,15 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis
|
|
|
221
262
|
List of scanned repos on which we want to delete revisions.
|
|
222
263
|
preselected (*List[`str`]*):
|
|
223
264
|
List of revision hashes that will be preselected.
|
|
265
|
+
sort_by (*Optional[SortingOption_T]*):
|
|
266
|
+
Sorting direction. Choices: "alphabetical", "lastUpdated", "lastUsed", "size".
|
|
224
267
|
|
|
225
268
|
Return:
|
|
226
269
|
The list of choices to pass to `inquirer.checkbox`.
|
|
227
270
|
"""
|
|
228
271
|
choices: List[Union[Choice, Separator]] = []
|
|
229
272
|
|
|
230
|
-
# First choice is to cancel the deletion
|
|
231
|
-
# no matter the other selected items.
|
|
273
|
+
# First choice is to cancel the deletion
|
|
232
274
|
choices.append(
|
|
233
275
|
Choice(
|
|
234
276
|
_CANCEL_DELETION_STR,
|
|
@@ -237,8 +279,10 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis
|
|
|
237
279
|
)
|
|
238
280
|
)
|
|
239
281
|
|
|
240
|
-
#
|
|
241
|
-
|
|
282
|
+
# Sort repos based on specified criteria
|
|
283
|
+
sorted_repos = sorted(repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
|
|
284
|
+
|
|
285
|
+
for repo in sorted_repos:
|
|
242
286
|
# Repo as separator
|
|
243
287
|
choices.append(
|
|
244
288
|
Separator(
|
|
@@ -264,7 +308,11 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis
|
|
|
264
308
|
return choices
|
|
265
309
|
|
|
266
310
|
|
|
267
|
-
def _manual_review_no_tui(
|
|
311
|
+
def _manual_review_no_tui(
|
|
312
|
+
hf_cache_info: HFCacheInfo,
|
|
313
|
+
preselected: List[str],
|
|
314
|
+
sort_by: Optional[SortingOption_T] = None,
|
|
315
|
+
) -> List[str]:
|
|
268
316
|
"""Ask the user for a manual review of the revisions to delete.
|
|
269
317
|
|
|
270
318
|
Used when TUI is disabled. Manual review happens in a separate tmp file that the
|
|
@@ -275,7 +323,10 @@ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) ->
|
|
|
275
323
|
os.close(fd)
|
|
276
324
|
|
|
277
325
|
lines = []
|
|
278
|
-
|
|
326
|
+
|
|
327
|
+
sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
|
|
328
|
+
|
|
329
|
+
for repo in sorted_repos:
|
|
279
330
|
lines.append(
|
|
280
331
|
f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
|
|
281
332
|
f" used {repo.last_accessed_str})"
|
|
@@ -314,9 +365,9 @@ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) ->
|
|
|
314
365
|
):
|
|
315
366
|
break
|
|
316
367
|
|
|
317
|
-
# 4. Return selected_hashes
|
|
368
|
+
# 4. Return selected_hashes sorted to maintain stable order
|
|
318
369
|
os.remove(tmp_path)
|
|
319
|
-
return selected_hashes
|
|
370
|
+
return sorted(selected_hashes) # Sort to maintain stable order
|
|
320
371
|
|
|
321
372
|
|
|
322
373
|
def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
|
|
@@ -418,11 +469,6 @@ _MANUAL_REVIEW_NO_TUI_INSTRUCTIONS = f"""
|
|
|
418
469
|
""".strip()
|
|
419
470
|
|
|
420
471
|
|
|
421
|
-
def _repo_sorting_order(repo: CachedRepoInfo) -> Any:
|
|
422
|
-
# First split by Dataset/Model, then sort by last accessed (oldest first)
|
|
423
|
-
return (repo.repo_type, repo.last_accessed)
|
|
424
|
-
|
|
425
|
-
|
|
426
472
|
def _revision_sorting_order(revision: CachedRevisionInfo) -> Any:
|
|
427
473
|
# Sort by last modified (oldest first)
|
|
428
474
|
return revision.last_modified
|
|
@@ -30,6 +30,9 @@ Usage:
|
|
|
30
30
|
# Upload filtered directory (example: tensorboard logs except for the last run)
|
|
31
31
|
huggingface-cli upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
|
|
32
32
|
|
|
33
|
+
# Upload with wildcard
|
|
34
|
+
huggingface-cli upload my-cool-model "./model/training/*.safetensors"
|
|
35
|
+
|
|
33
36
|
# Upload private dataset
|
|
34
37
|
huggingface-cli upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
|
|
35
38
|
|
|
@@ -69,7 +72,9 @@ class UploadCommand(BaseHuggingfaceCLICommand):
|
|
|
69
72
|
"repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
|
|
70
73
|
)
|
|
71
74
|
upload_parser.add_argument(
|
|
72
|
-
"local_path",
|
|
75
|
+
"local_path",
|
|
76
|
+
nargs="?",
|
|
77
|
+
help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
|
|
73
78
|
)
|
|
74
79
|
upload_parser.add_argument(
|
|
75
80
|
"path_in_repo",
|
|
@@ -155,7 +160,16 @@ class UploadCommand(BaseHuggingfaceCLICommand):
|
|
|
155
160
|
repo_name: str = args.repo_id.split("/")[-1] # e.g. "Wauplin/my-cool-model" => "my-cool-model"
|
|
156
161
|
self.local_path: str
|
|
157
162
|
self.path_in_repo: str
|
|
158
|
-
|
|
163
|
+
|
|
164
|
+
if args.local_path is not None and any(c in args.local_path for c in ["*", "?", "["]):
|
|
165
|
+
if args.include is not None:
|
|
166
|
+
raise ValueError("Cannot set `--include` when passing a `local_path` containing a wildcard.")
|
|
167
|
+
if args.path_in_repo is not None and args.path_in_repo != ".":
|
|
168
|
+
raise ValueError("Cannot set `path_in_repo` when passing a `local_path` containing a wildcard.")
|
|
169
|
+
self.local_path = "."
|
|
170
|
+
self.include = args.local_path
|
|
171
|
+
self.path_in_repo = "."
|
|
172
|
+
elif args.local_path is None and os.path.isfile(repo_name):
|
|
159
173
|
# Implicit case 1: user provided only a repo_id which happen to be a local file as well => upload it with same name
|
|
160
174
|
self.local_path = repo_name
|
|
161
175
|
self.path_in_repo = repo_name
|
huggingface_hub/constants.py
CHANGED
|
@@ -78,6 +78,7 @@ INFERENCE_ENDPOINT = os.environ.get("HF_INFERENCE_ENDPOINT", "https://api-infere
|
|
|
78
78
|
|
|
79
79
|
# See https://huggingface.co/docs/inference-endpoints/index
|
|
80
80
|
INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
|
|
81
|
+
INFERENCE_CATALOG_ENDPOINT = "https://endpoints.huggingface.co/api/catalog"
|
|
81
82
|
|
|
82
83
|
# Proxy for third-party providers
|
|
83
84
|
INFERENCE_PROXY_TEMPLATE = "https://router.huggingface.co/{provider}"
|
|
@@ -113,10 +114,12 @@ WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
|
|
|
113
114
|
|
|
114
115
|
# default cache
|
|
115
116
|
default_home = os.path.join(os.path.expanduser("~"), ".cache")
|
|
116
|
-
HF_HOME = os.path.
|
|
117
|
-
os.
|
|
118
|
-
|
|
119
|
-
|
|
117
|
+
HF_HOME = os.path.expandvars(
|
|
118
|
+
os.path.expanduser(
|
|
119
|
+
os.getenv(
|
|
120
|
+
"HF_HOME",
|
|
121
|
+
os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "huggingface"),
|
|
122
|
+
)
|
|
120
123
|
)
|
|
121
124
|
)
|
|
122
125
|
hf_cache_home = HF_HOME # for backward compatibility. TODO: remove this in 1.0.0
|
|
@@ -129,8 +132,22 @@ HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path)
|
|
|
129
132
|
HUGGINGFACE_ASSETS_CACHE = os.getenv("HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path)
|
|
130
133
|
|
|
131
134
|
# New env variables
|
|
132
|
-
HF_HUB_CACHE = os.
|
|
133
|
-
|
|
135
|
+
HF_HUB_CACHE = os.path.expandvars(
|
|
136
|
+
os.path.expanduser(
|
|
137
|
+
os.getenv(
|
|
138
|
+
"HF_HUB_CACHE",
|
|
139
|
+
HUGGINGFACE_HUB_CACHE,
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
HF_ASSETS_CACHE = os.path.expandvars(
|
|
144
|
+
os.path.expanduser(
|
|
145
|
+
os.getenv(
|
|
146
|
+
"HF_ASSETS_CACHE",
|
|
147
|
+
HUGGINGFACE_ASSETS_CACHE,
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
)
|
|
134
151
|
|
|
135
152
|
HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
|
|
136
153
|
|
|
@@ -145,7 +162,14 @@ HF_HUB_DISABLE_TELEMETRY = (
|
|
|
145
162
|
or _is_true(os.environ.get("DO_NOT_TRACK")) # https://consoledonottrack.com/
|
|
146
163
|
)
|
|
147
164
|
|
|
148
|
-
HF_TOKEN_PATH = os.
|
|
165
|
+
HF_TOKEN_PATH = os.path.expandvars(
|
|
166
|
+
os.path.expanduser(
|
|
167
|
+
os.getenv(
|
|
168
|
+
"HF_TOKEN_PATH",
|
|
169
|
+
os.path.join(HF_HOME, "token"),
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
)
|
|
149
173
|
HF_STORED_TOKENS_PATH = os.path.join(os.path.dirname(HF_TOKEN_PATH), "stored_tokens")
|
|
150
174
|
|
|
151
175
|
if _staging_mode:
|
|
@@ -233,3 +257,16 @@ ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [
|
|
|
233
257
|
"stanza",
|
|
234
258
|
"timm",
|
|
235
259
|
]
|
|
260
|
+
|
|
261
|
+
# Xet constants
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
HUGGINGFACE_HEADER_X_XET_ENDPOINT = "X-Xet-Cas-Url"
|
|
265
|
+
HUGGINGFACE_HEADER_X_XET_ACCESS_TOKEN = "X-Xet-Access-Token"
|
|
266
|
+
HUGGINGFACE_HEADER_X_XET_EXPIRATION = "X-Xet-Token-Expiration"
|
|
267
|
+
HUGGINGFACE_HEADER_X_XET_HASH = "X-Xet-Hash"
|
|
268
|
+
HUGGINGFACE_HEADER_X_XET_REFRESH_ROUTE = "X-Xet-Refresh-Route"
|
|
269
|
+
HUGGINGFACE_HEADER_LINK_XET_AUTH_KEY = "xet-auth"
|
|
270
|
+
|
|
271
|
+
default_xet_cache_path = os.path.join(HF_HOME, "xet")
|
|
272
|
+
HF_XET_CACHE = os.getenv("HF_XET_CACHE", default_xet_cache_path)
|