huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +33 -45
- huggingface_hub/_commit_api.py +39 -43
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +20 -20
- huggingface_hub/_login.py +17 -43
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +135 -50
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +5 -5
- huggingface_hub/_upload_large_folder.py +18 -32
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/__init__.py +0 -14
- huggingface_hub/cli/_cli_utils.py +143 -39
- huggingface_hub/cli/auth.py +105 -171
- huggingface_hub/cli/cache.py +594 -361
- huggingface_hub/cli/download.py +120 -112
- huggingface_hub/cli/hf.py +38 -41
- huggingface_hub/cli/jobs.py +689 -1017
- huggingface_hub/cli/lfs.py +120 -143
- huggingface_hub/cli/repo.py +282 -216
- huggingface_hub/cli/repo_files.py +50 -84
- huggingface_hub/cli/system.py +6 -25
- huggingface_hub/cli/upload.py +198 -220
- huggingface_hub/cli/upload_large_folder.py +91 -106
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +17 -52
- huggingface_hub/dataclasses.py +135 -21
- huggingface_hub/errors.py +47 -30
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +351 -303
- huggingface_hub/hf_api.py +398 -570
- huggingface_hub/hf_file_system.py +101 -66
- huggingface_hub/hub_mixin.py +32 -54
- huggingface_hub/inference/_client.py +177 -162
- huggingface_hub/inference/_common.py +38 -54
- huggingface_hub/inference/_generated/_async_client.py +218 -258
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/constants.py +1 -2
- huggingface_hub/inference/_mcp/mcp_client.py +33 -22
- huggingface_hub/inference/_mcp/types.py +10 -10
- huggingface_hub/inference/_mcp/utils.py +4 -4
- huggingface_hub/inference/_providers/__init__.py +12 -4
- huggingface_hub/inference/_providers/_common.py +62 -24
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +13 -13
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +21 -94
- huggingface_hub/repocard.py +15 -16
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +11 -6
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +49 -74
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +3 -3
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +371 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +59 -23
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
- huggingface_hub/utils/_typing.py +3 -3
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +1 -1
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
- huggingface_hub-1.0.0.dist-info/RECORD +152 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -204
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -161
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -208
- huggingface_hub/commands/version.py +0 -40
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -497
- huggingface_hub/repository.py +0 -1471
- huggingface_hub/serialization/_tensorflow.py +0 -92
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
huggingface_hub/cli/cache.py
CHANGED
|
@@ -12,392 +12,625 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
"""Contains the 'hf cache' command group with
|
|
15
|
+
"""Contains the 'hf cache' command group with cache management subcommands."""
|
|
16
16
|
|
|
17
|
-
import
|
|
17
|
+
import csv
|
|
18
|
+
import json
|
|
19
|
+
import re
|
|
20
|
+
import sys
|
|
18
21
|
import time
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from typing import Any, Callable,
|
|
23
|
-
|
|
24
|
-
from ..utils import CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir
|
|
25
|
-
from . import BaseHuggingfaceCLICommand
|
|
26
|
-
from ._cli_utils import ANSI, tabulate
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# --- DELETE helpers (from delete_cache.py) ---
|
|
30
|
-
try:
|
|
31
|
-
from InquirerPy import inquirer
|
|
32
|
-
from InquirerPy.base.control import Choice
|
|
33
|
-
from InquirerPy.separator import Separator
|
|
34
|
-
|
|
35
|
-
_inquirer_py_available = True
|
|
36
|
-
except ImportError:
|
|
37
|
-
_inquirer_py_available = False
|
|
38
|
-
|
|
39
|
-
SortingOption_T = Literal["alphabetical", "lastUpdated", "lastUsed", "size"]
|
|
40
|
-
_CANCEL_DELETION_STR = "CANCEL_DELETION"
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def require_inquirer_py(fn: Callable) -> Callable:
|
|
44
|
-
@wraps(fn)
|
|
45
|
-
def _inner(*args, **kwargs):
|
|
46
|
-
if not _inquirer_py_available:
|
|
47
|
-
raise ImportError(
|
|
48
|
-
"The 'cache delete' command requires extra dependencies for the TUI.\n"
|
|
49
|
-
"Please run 'pip install \"huggingface_hub[cli]\"' to install them.\n"
|
|
50
|
-
"Otherwise, disable TUI using the '--disable-tui' flag."
|
|
51
|
-
)
|
|
52
|
-
return fn(*args, **kwargs)
|
|
22
|
+
from collections import defaultdict
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from enum import Enum
|
|
25
|
+
from typing import Annotated, Any, Callable, Dict, List, Mapping, Optional, Tuple
|
|
53
26
|
|
|
54
|
-
|
|
27
|
+
import typer
|
|
55
28
|
|
|
29
|
+
from ..utils import (
|
|
30
|
+
ANSI,
|
|
31
|
+
CachedRepoInfo,
|
|
32
|
+
CachedRevisionInfo,
|
|
33
|
+
CacheNotFound,
|
|
34
|
+
HFCacheInfo,
|
|
35
|
+
_format_size,
|
|
36
|
+
scan_cache_dir,
|
|
37
|
+
tabulate,
|
|
38
|
+
)
|
|
39
|
+
from ..utils._parsing import parse_duration, parse_size
|
|
40
|
+
from ._cli_utils import typer_factory
|
|
56
41
|
|
|
57
|
-
class CacheCommand(BaseHuggingfaceCLICommand):
|
|
58
|
-
@staticmethod
|
|
59
|
-
def register_subcommand(parser: _SubParsersAction):
|
|
60
|
-
cache_parser = parser.add_parser("cache", help="Manage local cache directory.")
|
|
61
|
-
cache_subparsers = cache_parser.add_subparsers(dest="cache_command", help="Cache subcommands")
|
|
62
42
|
|
|
63
|
-
|
|
64
|
-
cache_parser.set_defaults(func=lambda args: cache_parser.print_help())
|
|
43
|
+
cache_cli = typer_factory(help="Manage local cache directory.")
|
|
65
44
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
print("
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
45
|
+
|
|
46
|
+
#### Cache helper utilities
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class OutputFormat(str, Enum):
|
|
50
|
+
table = "table"
|
|
51
|
+
json = "json"
|
|
52
|
+
csv = "csv"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class _DeletionResolution:
|
|
57
|
+
revisions: frozenset[str]
|
|
58
|
+
selected: dict[CachedRepoInfo, frozenset[CachedRevisionInfo]]
|
|
59
|
+
missing: tuple[str, ...]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
_FILTER_PATTERN = re.compile(r"^(?P<key>[a-zA-Z_]+)\s*(?P<op>==|!=|>=|<=|>|<|=)\s*(?P<value>.+)$")
|
|
63
|
+
_ALLOWED_OPERATORS = {"=", "!=", ">", "<", ">=", "<="}
|
|
64
|
+
_FILTER_KEYS = {"accessed", "modified", "refs", "size", "type"}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(frozen=True)
|
|
68
|
+
class CacheDeletionCounts:
|
|
69
|
+
"""Simple counters summarizing cache deletions for CLI messaging."""
|
|
70
|
+
|
|
71
|
+
repo_count: int
|
|
72
|
+
partial_revision_count: int
|
|
73
|
+
total_revision_count: int
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
CacheEntry = Tuple[CachedRepoInfo, Optional[CachedRevisionInfo]]
|
|
77
|
+
RepoRefsMap = Dict[CachedRepoInfo, frozenset[str]]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def summarize_deletions(
|
|
81
|
+
selected_by_repo: Mapping[CachedRepoInfo, frozenset[CachedRevisionInfo]],
|
|
82
|
+
) -> CacheDeletionCounts:
|
|
83
|
+
"""Summarize deletions across repositories."""
|
|
84
|
+
repo_count = 0
|
|
85
|
+
total_revisions = 0
|
|
86
|
+
revisions_in_full_repos = 0
|
|
87
|
+
|
|
88
|
+
for repo, revisions in selected_by_repo.items():
|
|
89
|
+
total_revisions += len(revisions)
|
|
90
|
+
if len(revisions) == len(repo.revisions):
|
|
91
|
+
repo_count += 1
|
|
92
|
+
revisions_in_full_repos += len(revisions)
|
|
93
|
+
|
|
94
|
+
partial_revision_count = total_revisions - revisions_in_full_repos
|
|
95
|
+
return CacheDeletionCounts(repo_count, partial_revision_count, total_revisions)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def print_cache_selected_revisions(selected_by_repo: Mapping[CachedRepoInfo, frozenset[CachedRevisionInfo]]) -> None:
|
|
99
|
+
"""Pretty-print selected cache revisions during confirmation prompts."""
|
|
100
|
+
for repo in sorted(selected_by_repo.keys(), key=lambda repo: (repo.repo_type, repo.repo_id.lower())):
|
|
101
|
+
repo_key = f"{repo.repo_type}/{repo.repo_id}"
|
|
102
|
+
revisions = sorted(selected_by_repo[repo], key=lambda rev: rev.commit_hash)
|
|
103
|
+
if len(revisions) == len(repo.revisions):
|
|
104
|
+
print(f" - {repo_key} (entire repo)")
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
print(f" - {repo_key}:")
|
|
108
|
+
for revision in revisions:
|
|
109
|
+
refs = " ".join(sorted(revision.refs)) or "(detached)"
|
|
110
|
+
print(f" {revision.commit_hash} [{refs}] {revision.size_on_disk_str}")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def build_cache_index(
|
|
114
|
+
hf_cache_info: HFCacheInfo,
|
|
115
|
+
) -> Tuple[
|
|
116
|
+
Dict[str, CachedRepoInfo],
|
|
117
|
+
Dict[str, Tuple[CachedRepoInfo, CachedRevisionInfo]],
|
|
118
|
+
]:
|
|
119
|
+
"""Create lookup tables so CLI commands can resolve repo ids and revisions quickly."""
|
|
120
|
+
repo_lookup: dict[str, CachedRepoInfo] = {}
|
|
121
|
+
revision_lookup: dict[str, tuple[CachedRepoInfo, CachedRevisionInfo]] = {}
|
|
122
|
+
for repo in hf_cache_info.repos:
|
|
123
|
+
repo_key = repo.cache_id.lower()
|
|
124
|
+
repo_lookup[repo_key] = repo
|
|
125
|
+
for revision in repo.revisions:
|
|
126
|
+
revision_lookup[revision.commit_hash.lower()] = (repo, revision)
|
|
127
|
+
return repo_lookup, revision_lookup
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def collect_cache_entries(
|
|
131
|
+
hf_cache_info: HFCacheInfo, *, include_revisions: bool
|
|
132
|
+
) -> Tuple[List[CacheEntry], RepoRefsMap]:
|
|
133
|
+
"""Flatten cache metadata into rows consumed by `hf cache ls`."""
|
|
134
|
+
entries: List[CacheEntry] = []
|
|
135
|
+
repo_refs_map: RepoRefsMap = {}
|
|
136
|
+
sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: (repo.repo_type, repo.repo_id.lower()))
|
|
137
|
+
for repo in sorted_repos:
|
|
138
|
+
repo_refs_map[repo] = frozenset({ref for revision in repo.revisions for ref in revision.refs})
|
|
139
|
+
if include_revisions:
|
|
140
|
+
for revision in sorted(repo.revisions, key=lambda rev: rev.commit_hash):
|
|
141
|
+
entries.append((repo, revision))
|
|
154
142
|
else:
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
if confirmed:
|
|
163
|
-
strategy = hf_cache_info.delete_revisions(*selected_hashes)
|
|
164
|
-
print("Start deletion.")
|
|
165
|
-
strategy.execute()
|
|
166
|
-
print(
|
|
167
|
-
f"Done. Deleted {len(strategy.repos)} repo(s) and"
|
|
168
|
-
f" {len(strategy.snapshots)} revision(s) for a total of"
|
|
169
|
-
f" {strategy.expected_freed_size_str}."
|
|
170
|
-
)
|
|
171
|
-
return
|
|
172
|
-
print("Deletion is cancelled. Do nothing.")
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
def get_table(hf_cache_info: HFCacheInfo, *, verbosity: int = 0) -> str:
|
|
176
|
-
if verbosity == 0:
|
|
177
|
-
return tabulate(
|
|
178
|
-
rows=[
|
|
179
|
-
[
|
|
180
|
-
repo.repo_id,
|
|
181
|
-
repo.repo_type,
|
|
182
|
-
"{:>12}".format(repo.size_on_disk_str),
|
|
183
|
-
repo.nb_files,
|
|
184
|
-
repo.last_accessed_str,
|
|
185
|
-
repo.last_modified_str,
|
|
186
|
-
", ".join(sorted(repo.refs)),
|
|
187
|
-
str(repo.repo_path),
|
|
188
|
-
]
|
|
189
|
-
for repo in sorted(hf_cache_info.repos, key=lambda repo: repo.repo_path)
|
|
190
|
-
],
|
|
191
|
-
headers=[
|
|
192
|
-
"REPO ID",
|
|
193
|
-
"REPO TYPE",
|
|
194
|
-
"SIZE ON DISK",
|
|
195
|
-
"NB FILES",
|
|
196
|
-
"LAST_ACCESSED",
|
|
197
|
-
"LAST_MODIFIED",
|
|
198
|
-
"REFS",
|
|
199
|
-
"LOCAL PATH",
|
|
200
|
-
],
|
|
143
|
+
entries.append((repo, None))
|
|
144
|
+
if include_revisions:
|
|
145
|
+
entries.sort(
|
|
146
|
+
key=lambda entry: (
|
|
147
|
+
entry[0].cache_id,
|
|
148
|
+
entry[1].commit_hash if entry[1] is not None else "",
|
|
149
|
+
)
|
|
201
150
|
)
|
|
202
151
|
else:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
152
|
+
entries.sort(key=lambda entry: entry[0].cache_id)
|
|
153
|
+
return entries, repo_refs_map
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def compile_cache_filter(
|
|
157
|
+
expr: str, repo_refs_map: RepoRefsMap
|
|
158
|
+
) -> Callable[[CachedRepoInfo, Optional[CachedRevisionInfo], float], bool]:
|
|
159
|
+
"""Convert a `hf cache ls` filter expression into the yes/no test we apply to each cache entry before displaying it."""
|
|
160
|
+
match = _FILTER_PATTERN.match(expr.strip())
|
|
161
|
+
if not match:
|
|
162
|
+
raise ValueError(f"Invalid filter expression: '{expr}'.")
|
|
163
|
+
|
|
164
|
+
key = match.group("key").lower()
|
|
165
|
+
op = match.group("op")
|
|
166
|
+
value_raw = match.group("value").strip()
|
|
167
|
+
|
|
168
|
+
if op not in _ALLOWED_OPERATORS:
|
|
169
|
+
raise ValueError(f"Unsupported operator '{op}' in filter '{expr}'. Must be one of {list(_ALLOWED_OPERATORS)}.")
|
|
170
|
+
|
|
171
|
+
if key not in _FILTER_KEYS:
|
|
172
|
+
raise ValueError(f"Unsupported filter key '{key}' in '{expr}'. Must be one of {list(_FILTER_KEYS)}.")
|
|
173
|
+
# at this point we know that key is in `_FILTER_KEYS`
|
|
174
|
+
if key == "size":
|
|
175
|
+
size_threshold = parse_size(value_raw)
|
|
176
|
+
return lambda repo, revision, _: _compare_numeric(
|
|
177
|
+
revision.size_on_disk if revision is not None else repo.size_on_disk,
|
|
178
|
+
op,
|
|
179
|
+
size_threshold,
|
|
228
180
|
)
|
|
229
181
|
|
|
182
|
+
if key in {"modified", "accessed"}:
|
|
183
|
+
seconds = parse_duration(value_raw.strip())
|
|
230
184
|
|
|
231
|
-
def
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
185
|
+
def _time_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], now: float) -> bool:
|
|
186
|
+
timestamp = (
|
|
187
|
+
repo.last_accessed
|
|
188
|
+
if key == "accessed"
|
|
189
|
+
else revision.last_modified
|
|
190
|
+
if revision is not None
|
|
191
|
+
else repo.last_modified
|
|
192
|
+
)
|
|
193
|
+
if timestamp is None:
|
|
194
|
+
return False
|
|
195
|
+
return _compare_numeric(now - timestamp, op, seconds)
|
|
196
|
+
|
|
197
|
+
return _time_filter
|
|
198
|
+
|
|
199
|
+
if key == "type":
|
|
200
|
+
expected = value_raw.lower()
|
|
201
|
+
|
|
202
|
+
if op != "=":
|
|
203
|
+
raise ValueError(f"Only '=' is supported for 'type' filters. Got '{op}'.")
|
|
204
|
+
|
|
205
|
+
def _type_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], _: float) -> bool:
|
|
206
|
+
return repo.repo_type.lower() == expected
|
|
207
|
+
|
|
208
|
+
return _type_filter
|
|
209
|
+
|
|
210
|
+
else: # key == "refs"
|
|
211
|
+
if op != "=":
|
|
212
|
+
raise ValueError(f"Only '=' is supported for 'refs' filters. Got {op}.")
|
|
213
|
+
|
|
214
|
+
def _refs_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], _: float) -> bool:
|
|
215
|
+
refs = revision.refs if revision is not None else repo_refs_map.get(repo, frozenset())
|
|
216
|
+
return value_raw.lower() in [ref.lower() for ref in refs]
|
|
217
|
+
|
|
218
|
+
return _refs_filter
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _build_cache_export_payload(
|
|
222
|
+
entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
|
|
223
|
+
) -> List[Dict[str, Any]]:
|
|
224
|
+
"""Normalize cache entries into serializable records for JSON/CSV exports."""
|
|
225
|
+
payload: List[Dict[str, Any]] = []
|
|
226
|
+
for repo, revision in entries:
|
|
227
|
+
if include_revisions:
|
|
228
|
+
if revision is None:
|
|
229
|
+
continue
|
|
230
|
+
record: Dict[str, Any] = {
|
|
231
|
+
"repo_id": repo.repo_id,
|
|
232
|
+
"repo_type": repo.repo_type,
|
|
233
|
+
"revision": revision.commit_hash,
|
|
234
|
+
"snapshot_path": str(revision.snapshot_path),
|
|
235
|
+
"size_on_disk": revision.size_on_disk,
|
|
236
|
+
"last_accessed": repo.last_accessed,
|
|
237
|
+
"last_modified": revision.last_modified,
|
|
238
|
+
"refs": sorted(revision.refs),
|
|
239
|
+
}
|
|
240
|
+
else:
|
|
241
|
+
record = {
|
|
242
|
+
"repo_id": repo.repo_id,
|
|
243
|
+
"repo_type": repo.repo_type,
|
|
244
|
+
"size_on_disk": repo.size_on_disk,
|
|
245
|
+
"last_accessed": repo.last_accessed,
|
|
246
|
+
"last_modified": repo.last_modified,
|
|
247
|
+
"refs": sorted(repo_refs_map.get(repo, frozenset())),
|
|
248
|
+
}
|
|
249
|
+
payload.append(record)
|
|
250
|
+
return payload
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def print_cache_entries_table(
|
|
254
|
+
entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
|
|
255
|
+
) -> None:
|
|
256
|
+
"""Render cache entries as a table and show a human-readable summary."""
|
|
257
|
+
if not entries:
|
|
258
|
+
message = "No cached revisions found." if include_revisions else "No cached repositories found."
|
|
259
|
+
print(message)
|
|
260
|
+
return
|
|
261
|
+
table_rows: List[List[str]]
|
|
262
|
+
if include_revisions:
|
|
263
|
+
headers = ["ID", "REVISION", "SIZE", "LAST_MODIFIED", "REFS"]
|
|
264
|
+
table_rows = [
|
|
265
|
+
[
|
|
266
|
+
repo.cache_id,
|
|
267
|
+
revision.commit_hash,
|
|
268
|
+
revision.size_on_disk_str.rjust(8),
|
|
269
|
+
revision.last_modified_str,
|
|
270
|
+
" ".join(sorted(revision.refs)),
|
|
271
|
+
]
|
|
272
|
+
for repo, revision in entries
|
|
273
|
+
if revision is not None
|
|
274
|
+
]
|
|
240
275
|
else:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
276
|
+
headers = ["ID", "SIZE", "LAST_ACCESSED", "LAST_MODIFIED", "REFS"]
|
|
277
|
+
table_rows = [
|
|
278
|
+
[
|
|
279
|
+
repo.cache_id,
|
|
280
|
+
repo.size_on_disk_str.rjust(8),
|
|
281
|
+
repo.last_accessed_str or "",
|
|
282
|
+
repo.last_modified_str,
|
|
283
|
+
" ".join(sorted(repo_refs_map.get(repo, frozenset()))),
|
|
284
|
+
]
|
|
285
|
+
for repo, _ in entries
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
print(tabulate(table_rows, headers=headers)) # type: ignore[arg-type]
|
|
289
|
+
|
|
290
|
+
unique_repos = {repo for repo, _ in entries}
|
|
291
|
+
repo_count = len(unique_repos)
|
|
292
|
+
if include_revisions:
|
|
293
|
+
revision_count = sum(1 for _, revision in entries if revision is not None)
|
|
294
|
+
total_size = sum(revision.size_on_disk for _, revision in entries if revision is not None)
|
|
295
|
+
else:
|
|
296
|
+
revision_count = sum(len(repo.revisions) for repo in unique_repos)
|
|
297
|
+
total_size = sum(repo.size_on_disk for repo in unique_repos)
|
|
298
|
+
|
|
299
|
+
summary = f"\nFound {repo_count} repo(s) for a total of {revision_count} revision(s) and {_format_size(total_size)} on disk."
|
|
300
|
+
print(ANSI.bold(summary))
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def print_cache_entries_json(
|
|
304
|
+
entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
|
|
305
|
+
) -> None:
|
|
306
|
+
"""Dump cache entries as JSON for scripting or automation."""
|
|
307
|
+
payload = _build_cache_export_payload(entries, include_revisions=include_revisions, repo_refs_map=repo_refs_map)
|
|
308
|
+
json.dump(payload, sys.stdout, indent=2)
|
|
309
|
+
sys.stdout.write("\n")
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def print_cache_entries_csv(entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap) -> None:
|
|
313
|
+
"""Export cache entries as CSV rows with the shared payload format."""
|
|
314
|
+
records = _build_cache_export_payload(entries, include_revisions=include_revisions, repo_refs_map=repo_refs_map)
|
|
315
|
+
writer = csv.writer(sys.stdout)
|
|
316
|
+
|
|
317
|
+
if include_revisions:
|
|
318
|
+
headers = [
|
|
319
|
+
"repo_id",
|
|
320
|
+
"repo_type",
|
|
321
|
+
"revision",
|
|
322
|
+
"snapshot_path",
|
|
323
|
+
"size_on_disk",
|
|
324
|
+
"last_accessed",
|
|
325
|
+
"last_modified",
|
|
326
|
+
"refs",
|
|
327
|
+
]
|
|
328
|
+
else:
|
|
329
|
+
headers = ["repo_id", "repo_type", "size_on_disk", "last_accessed", "last_modified", "refs"]
|
|
330
|
+
|
|
331
|
+
writer.writerow(headers)
|
|
332
|
+
|
|
333
|
+
if not records:
|
|
334
|
+
return
|
|
335
|
+
|
|
336
|
+
for record in records:
|
|
337
|
+
refs = record["refs"]
|
|
338
|
+
if include_revisions:
|
|
339
|
+
row = [
|
|
340
|
+
record.get("repo_id", ""),
|
|
341
|
+
record.get("repo_type", ""),
|
|
342
|
+
record.get("revision", ""),
|
|
343
|
+
record.get("snapshot_path", ""),
|
|
344
|
+
record.get("size_on_disk"),
|
|
345
|
+
record.get("last_accessed"),
|
|
346
|
+
record.get("last_modified"),
|
|
347
|
+
" ".join(refs) if refs else "",
|
|
348
|
+
]
|
|
349
|
+
else:
|
|
350
|
+
row = [
|
|
351
|
+
record.get("repo_id", ""),
|
|
352
|
+
record.get("repo_type", ""),
|
|
353
|
+
record.get("size_on_disk"),
|
|
354
|
+
record.get("last_accessed"),
|
|
355
|
+
record.get("last_modified"),
|
|
356
|
+
" ".join(refs) if refs else "",
|
|
357
|
+
]
|
|
358
|
+
writer.writerow(row)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _compare_numeric(left: Optional[float], op: str, right: float) -> bool:
|
|
362
|
+
"""Evaluate numeric comparisons for filters."""
|
|
363
|
+
if left is None:
|
|
364
|
+
return False
|
|
365
|
+
|
|
366
|
+
comparisons = {
|
|
367
|
+
"=": left == right,
|
|
368
|
+
"!=": left != right,
|
|
369
|
+
">": left > right,
|
|
370
|
+
"<": left < right,
|
|
371
|
+
">=": left >= right,
|
|
372
|
+
"<=": left <= right,
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if op not in comparisons:
|
|
376
|
+
raise ValueError(f"Unsupported numeric comparison operator: {op}")
|
|
377
|
+
|
|
378
|
+
return comparisons[op]
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _resolve_deletion_targets(hf_cache_info: HFCacheInfo, targets: list[str]) -> _DeletionResolution:
|
|
382
|
+
"""Resolve the deletion targets into a deletion resolution."""
|
|
383
|
+
repo_lookup, revision_lookup = build_cache_index(hf_cache_info)
|
|
384
|
+
|
|
385
|
+
selected: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
|
|
386
|
+
revisions: set[str] = set()
|
|
387
|
+
missing: list[str] = []
|
|
388
|
+
|
|
389
|
+
for raw_target in targets:
|
|
390
|
+
target = raw_target.strip()
|
|
391
|
+
if not target:
|
|
392
|
+
continue
|
|
393
|
+
lowered = target.lower()
|
|
394
|
+
|
|
395
|
+
if re.fullmatch(r"[0-9a-fA-F]{40}", lowered):
|
|
396
|
+
match = revision_lookup.get(lowered)
|
|
397
|
+
if match is None:
|
|
398
|
+
missing.append(raw_target)
|
|
399
|
+
continue
|
|
400
|
+
repo, revision = match
|
|
401
|
+
selected[repo].add(revision)
|
|
402
|
+
revisions.add(revision.commit_hash)
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
matched_repo = repo_lookup.get(lowered)
|
|
406
|
+
if matched_repo is None:
|
|
407
|
+
missing.append(raw_target)
|
|
408
|
+
continue
|
|
409
|
+
|
|
410
|
+
for revision in matched_repo.revisions:
|
|
411
|
+
selected[matched_repo].add(revision)
|
|
412
|
+
revisions.add(revision.commit_hash)
|
|
413
|
+
|
|
414
|
+
frozen_selected = {repo: frozenset(revs) for repo, revs in selected.items()}
|
|
415
|
+
return _DeletionResolution(
|
|
416
|
+
revisions=frozenset(revisions),
|
|
417
|
+
selected=frozen_selected,
|
|
418
|
+
missing=tuple(missing),
|
|
259
419
|
)
|
|
260
420
|
|
|
261
|
-
def _update_expectations(_):
|
|
262
|
-
checkbox._instruction = _get_expectations_str(
|
|
263
|
-
hf_cache_info,
|
|
264
|
-
selected_hashes=[choice["value"] for choice in checkbox.content_control.choices if choice["enabled"]],
|
|
265
|
-
)
|
|
266
421
|
|
|
267
|
-
|
|
268
|
-
try:
|
|
269
|
-
return checkbox.execute()
|
|
270
|
-
except KeyboardInterrupt:
|
|
271
|
-
return []
|
|
422
|
+
#### Cache CLI commands
|
|
272
423
|
|
|
273
424
|
|
|
274
|
-
@
|
|
275
|
-
def
|
|
276
|
-
|
|
425
|
+
@cache_cli.command()
|
|
426
|
+
def ls(
|
|
427
|
+
cache_dir: Annotated[
|
|
428
|
+
Optional[str],
|
|
429
|
+
typer.Option(
|
|
430
|
+
help="Cache directory to scan (defaults to Hugging Face cache).",
|
|
431
|
+
),
|
|
432
|
+
] = None,
|
|
433
|
+
revisions: Annotated[
|
|
434
|
+
bool,
|
|
435
|
+
typer.Option(
|
|
436
|
+
help="Include revisions in the output instead of aggregated repositories.",
|
|
437
|
+
),
|
|
438
|
+
] = False,
|
|
439
|
+
filter: Annotated[
|
|
440
|
+
Optional[list[str]],
|
|
441
|
+
typer.Option(
|
|
442
|
+
"-f",
|
|
443
|
+
"--filter",
|
|
444
|
+
help="Filter entries (e.g. 'size>1GB', 'type=model', 'accessed>7d'). Can be used multiple times.",
|
|
445
|
+
),
|
|
446
|
+
] = None,
|
|
447
|
+
format: Annotated[
|
|
448
|
+
OutputFormat,
|
|
449
|
+
typer.Option(
|
|
450
|
+
help="Output format.",
|
|
451
|
+
),
|
|
452
|
+
] = OutputFormat.table,
|
|
453
|
+
quiet: Annotated[
|
|
454
|
+
bool,
|
|
455
|
+
typer.Option(
|
|
456
|
+
"-q",
|
|
457
|
+
"--quiet",
|
|
458
|
+
help="Print only IDs (repo IDs or revision hashes).",
|
|
459
|
+
),
|
|
460
|
+
] = False,
|
|
461
|
+
) -> None:
|
|
462
|
+
"""List cached repositories or revisions."""
|
|
463
|
+
try:
|
|
464
|
+
hf_cache_info = scan_cache_dir(cache_dir)
|
|
465
|
+
except CacheNotFound as exc:
|
|
466
|
+
print(f"Cache directory not found: {str(exc.cache_dir)}")
|
|
467
|
+
raise typer.Exit(code=1)
|
|
277
468
|
|
|
469
|
+
filters = filter or []
|
|
278
470
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
)
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
471
|
+
entries, repo_refs_map = collect_cache_entries(hf_cache_info, include_revisions=revisions)
|
|
472
|
+
try:
|
|
473
|
+
filter_fns = [compile_cache_filter(expr, repo_refs_map) for expr in filters]
|
|
474
|
+
except ValueError as exc:
|
|
475
|
+
raise typer.BadParameter(str(exc)) from exc
|
|
476
|
+
|
|
477
|
+
now = time.time()
|
|
478
|
+
for fn in filter_fns:
|
|
479
|
+
entries = [entry for entry in entries if fn(entry[0], entry[1], now)]
|
|
480
|
+
|
|
481
|
+
if quiet:
|
|
482
|
+
for repo, revision in entries:
|
|
483
|
+
print(revision.commit_hash if revision is not None else repo.cache_id)
|
|
484
|
+
return
|
|
485
|
+
|
|
486
|
+
formatters = {
|
|
487
|
+
OutputFormat.table: print_cache_entries_table,
|
|
488
|
+
OutputFormat.json: print_cache_entries_json,
|
|
489
|
+
OutputFormat.csv: print_cache_entries_csv,
|
|
490
|
+
}
|
|
491
|
+
return formatters[format](entries, include_revisions=revisions, repo_refs_map=repo_refs_map)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
@cache_cli.command()
|
|
495
|
+
def rm(
|
|
496
|
+
targets: Annotated[
|
|
497
|
+
list[str],
|
|
498
|
+
typer.Argument(
|
|
499
|
+
help="One or more repo IDs (e.g. model/bert-base-uncased) or revision hashes to delete.",
|
|
500
|
+
),
|
|
501
|
+
],
|
|
502
|
+
cache_dir: Annotated[
|
|
503
|
+
Optional[str],
|
|
504
|
+
typer.Option(
|
|
505
|
+
help="Cache directory to scan (defaults to Hugging Face cache).",
|
|
506
|
+
),
|
|
507
|
+
] = None,
|
|
508
|
+
yes: Annotated[
|
|
509
|
+
bool,
|
|
510
|
+
typer.Option(
|
|
511
|
+
"-y",
|
|
512
|
+
"--yes",
|
|
513
|
+
help="Skip confirmation prompt.",
|
|
514
|
+
),
|
|
515
|
+
] = False,
|
|
516
|
+
dry_run: Annotated[
|
|
517
|
+
bool,
|
|
518
|
+
typer.Option(
|
|
519
|
+
help="Preview deletions without removing anything.",
|
|
520
|
+
),
|
|
521
|
+
] = False,
|
|
522
|
+
) -> None:
|
|
523
|
+
"""Remove cached repositories or revisions."""
|
|
524
|
+
try:
|
|
525
|
+
hf_cache_info = scan_cache_dir(cache_dir)
|
|
526
|
+
except CacheNotFound as exc:
|
|
527
|
+
print(f"Cache directory not found: {str(exc.cache_dir)}")
|
|
528
|
+
raise typer.Exit(code=1)
|
|
529
|
+
|
|
530
|
+
resolution = _resolve_deletion_targets(hf_cache_info, targets)
|
|
531
|
+
|
|
532
|
+
if resolution.missing:
|
|
533
|
+
print("Could not find the following targets in the cache:")
|
|
534
|
+
for entry in resolution.missing:
|
|
535
|
+
print(f" - {entry}")
|
|
536
|
+
|
|
537
|
+
if len(resolution.revisions) == 0:
|
|
538
|
+
print("Nothing to delete.")
|
|
539
|
+
raise typer.Exit(code=0)
|
|
540
|
+
|
|
541
|
+
strategy = hf_cache_info.delete_revisions(*sorted(resolution.revisions))
|
|
542
|
+
counts = summarize_deletions(resolution.selected)
|
|
543
|
+
|
|
544
|
+
summary_parts: list[str] = []
|
|
545
|
+
if counts.repo_count:
|
|
546
|
+
summary_parts.append(f"{counts.repo_count} repo(s)")
|
|
547
|
+
if counts.partial_revision_count:
|
|
548
|
+
summary_parts.append(f"{counts.partial_revision_count} revision(s)")
|
|
549
|
+
if not summary_parts:
|
|
550
|
+
summary_parts.append(f"{counts.total_revision_count} revision(s)")
|
|
551
|
+
|
|
552
|
+
summary_text = " and ".join(summary_parts)
|
|
553
|
+
print(f"About to delete {summary_text} totalling {strategy.expected_freed_size_str}.")
|
|
554
|
+
print_cache_selected_revisions(resolution.selected)
|
|
555
|
+
|
|
556
|
+
if dry_run:
|
|
557
|
+
print("Dry run: no files were deleted.")
|
|
558
|
+
return
|
|
559
|
+
|
|
560
|
+
if not yes and not typer.confirm("Proceed with deletion?", default=False):
|
|
561
|
+
print("Deletion cancelled.")
|
|
562
|
+
return
|
|
563
|
+
|
|
564
|
+
strategy.execute()
|
|
565
|
+
counts = summarize_deletions(resolution.selected)
|
|
566
|
+
print(
|
|
567
|
+
f"Deleted {counts.repo_count} repo(s) and {counts.total_revision_count} revision(s); freed {strategy.expected_freed_size_str}."
|
|
287
568
|
)
|
|
288
|
-
sorted_repos = sorted(repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
|
|
289
|
-
for repo in sorted_repos:
|
|
290
|
-
choices.append(
|
|
291
|
-
Separator(
|
|
292
|
-
f"\n{repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str}, used {repo.last_accessed_str})"
|
|
293
|
-
)
|
|
294
|
-
)
|
|
295
|
-
for revision in sorted(repo.revisions, key=_revision_sorting_order):
|
|
296
|
-
choices.append(
|
|
297
|
-
Choice(
|
|
298
|
-
revision.commit_hash,
|
|
299
|
-
name=(
|
|
300
|
-
f"{revision.commit_hash[:8]}: {', '.join(sorted(revision.refs)) or '(detached)'} # modified {revision.last_modified_str}"
|
|
301
|
-
),
|
|
302
|
-
enabled=revision.commit_hash in preselected,
|
|
303
|
-
)
|
|
304
|
-
)
|
|
305
|
-
return choices
|
|
306
569
|
|
|
307
570
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
print(f"Invalid input. Must be one of {ALL}")
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: List[str]) -> str:
|
|
362
|
-
if _CANCEL_DELETION_STR in selected_hashes:
|
|
363
|
-
return "Nothing will be deleted."
|
|
364
|
-
strategy = hf_cache_info.delete_revisions(*selected_hashes)
|
|
365
|
-
return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}."
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
def _read_manual_review_tmp_file(tmp_path: str) -> List[str]:
|
|
369
|
-
with open(tmp_path) as f:
|
|
370
|
-
content = f.read()
|
|
371
|
-
lines = [line.strip() for line in content.split("\n")]
|
|
372
|
-
selected_lines = [line for line in lines if not line.startswith("#")]
|
|
373
|
-
selected_hashes = [line.split("#")[0].strip() for line in selected_lines]
|
|
374
|
-
return [hash for hash in selected_hashes if len(hash) > 0]
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS = f"""
|
|
378
|
-
# INSTRUCTIONS
|
|
379
|
-
# ------------
|
|
380
|
-
# This is a temporary file created by running `hf cache delete --disable-tui`. It contains a set of revisions that can be deleted from your local cache directory.
|
|
381
|
-
#
|
|
382
|
-
# Please manually review the revisions you want to delete:
|
|
383
|
-
# - Revision hashes can be commented out with '#'.
|
|
384
|
-
# - Only non-commented revisions in this file will be deleted.
|
|
385
|
-
# - Revision hashes that are removed from this file are ignored as well.
|
|
386
|
-
# - If `{_CANCEL_DELETION_STR}` line is uncommented, the all cache deletion is cancelled and no changes will be applied.
|
|
387
|
-
#
|
|
388
|
-
# Once you've manually reviewed this file, please confirm deletion in the terminal. This file will be automatically removed once done.
|
|
389
|
-
# ------------
|
|
571
|
+
@cache_cli.command()
|
|
572
|
+
def prune(
|
|
573
|
+
cache_dir: Annotated[
|
|
574
|
+
Optional[str],
|
|
575
|
+
typer.Option(
|
|
576
|
+
help="Cache directory to scan (defaults to Hugging Face cache).",
|
|
577
|
+
),
|
|
578
|
+
] = None,
|
|
579
|
+
yes: Annotated[
|
|
580
|
+
bool,
|
|
581
|
+
typer.Option(
|
|
582
|
+
"-y",
|
|
583
|
+
"--yes",
|
|
584
|
+
help="Skip confirmation prompt.",
|
|
585
|
+
),
|
|
586
|
+
] = False,
|
|
587
|
+
dry_run: Annotated[
|
|
588
|
+
bool,
|
|
589
|
+
typer.Option(
|
|
590
|
+
help="Preview deletions without removing anything.",
|
|
591
|
+
),
|
|
592
|
+
] = False,
|
|
593
|
+
) -> None:
|
|
594
|
+
"""Remove detached revisions from the cache."""
|
|
595
|
+
try:
|
|
596
|
+
hf_cache_info = scan_cache_dir(cache_dir)
|
|
597
|
+
except CacheNotFound as exc:
|
|
598
|
+
print(f"Cache directory not found: {str(exc.cache_dir)}")
|
|
599
|
+
raise typer.Exit(code=1)
|
|
600
|
+
|
|
601
|
+
selected: dict[CachedRepoInfo, frozenset[CachedRevisionInfo]] = {}
|
|
602
|
+
revisions: set[str] = set()
|
|
603
|
+
for repo in hf_cache_info.repos:
|
|
604
|
+
detached = frozenset(revision for revision in repo.revisions if len(revision.refs) == 0)
|
|
605
|
+
if not detached:
|
|
606
|
+
continue
|
|
607
|
+
selected[repo] = detached
|
|
608
|
+
revisions.update(revision.commit_hash for revision in detached)
|
|
609
|
+
|
|
610
|
+
if len(revisions) == 0:
|
|
611
|
+
print("No unreferenced revisions found. Nothing to prune.")
|
|
612
|
+
return
|
|
613
|
+
|
|
614
|
+
resolution = _DeletionResolution(
|
|
615
|
+
revisions=frozenset(revisions),
|
|
616
|
+
selected=selected,
|
|
617
|
+
missing=(),
|
|
618
|
+
)
|
|
619
|
+
strategy = hf_cache_info.delete_revisions(*sorted(resolution.revisions))
|
|
620
|
+
counts = summarize_deletions(selected)
|
|
390
621
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
# ------------
|
|
622
|
+
print(
|
|
623
|
+
f"About to delete {counts.total_revision_count} unreferenced revision(s) ({strategy.expected_freed_size_str} total)."
|
|
624
|
+
)
|
|
625
|
+
print_cache_selected_revisions(selected)
|
|
396
626
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
627
|
+
if dry_run:
|
|
628
|
+
print("Dry run: no files were deleted.")
|
|
629
|
+
return
|
|
400
630
|
|
|
631
|
+
if not yes and not typer.confirm("Proceed?"):
|
|
632
|
+
print("Pruning cancelled.")
|
|
633
|
+
return
|
|
401
634
|
|
|
402
|
-
|
|
403
|
-
|
|
635
|
+
strategy.execute()
|
|
636
|
+
print(f"Deleted {counts.total_revision_count} unreferenced revision(s); freed {strategy.expected_freed_size_str}.")
|