huggingface-hub 1.0.0rc6__py3-none-any.whl → 1.0.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (36) hide show
  1. huggingface_hub/__init__.py +7 -1
  2. huggingface_hub/_commit_api.py +1 -5
  3. huggingface_hub/_jobs_api.py +1 -1
  4. huggingface_hub/_login.py +3 -3
  5. huggingface_hub/_snapshot_download.py +4 -3
  6. huggingface_hub/_upload_large_folder.py +2 -15
  7. huggingface_hub/_webhooks_server.py +1 -1
  8. huggingface_hub/cli/_cli_utils.py +1 -1
  9. huggingface_hub/cli/auth.py +0 -20
  10. huggingface_hub/cli/cache.py +561 -304
  11. huggingface_hub/cli/download.py +2 -2
  12. huggingface_hub/cli/repo.py +0 -7
  13. huggingface_hub/cli/upload.py +0 -8
  14. huggingface_hub/community.py +16 -8
  15. huggingface_hub/constants.py +10 -11
  16. huggingface_hub/file_download.py +9 -61
  17. huggingface_hub/hf_api.py +170 -126
  18. huggingface_hub/hf_file_system.py +31 -6
  19. huggingface_hub/inference/_client.py +1 -1
  20. huggingface_hub/inference/_generated/_async_client.py +1 -1
  21. huggingface_hub/inference/_providers/__init__.py +15 -2
  22. huggingface_hub/inference/_providers/_common.py +39 -0
  23. huggingface_hub/inference/_providers/clarifai.py +13 -0
  24. huggingface_hub/lfs.py +3 -65
  25. huggingface_hub/serialization/_torch.py +1 -1
  26. huggingface_hub/utils/__init__.py +0 -2
  27. huggingface_hub/utils/_cache_manager.py +17 -42
  28. huggingface_hub/utils/_http.py +25 -3
  29. huggingface_hub/utils/_parsing.py +98 -0
  30. huggingface_hub/utils/_runtime.py +1 -14
  31. {huggingface_hub-1.0.0rc6.dist-info → huggingface_hub-1.0.0rc7.dist-info}/METADATA +4 -14
  32. {huggingface_hub-1.0.0rc6.dist-info → huggingface_hub-1.0.0rc7.dist-info}/RECORD +36 -34
  33. {huggingface_hub-1.0.0rc6.dist-info → huggingface_hub-1.0.0rc7.dist-info}/LICENSE +0 -0
  34. {huggingface_hub-1.0.0rc6.dist-info → huggingface_hub-1.0.0rc7.dist-info}/WHEEL +0 -0
  35. {huggingface_hub-1.0.0rc6.dist-info → huggingface_hub-1.0.0rc7.dist-info}/entry_points.txt +0 -0
  36. {huggingface_hub-1.0.0rc6.dist-info → huggingface_hub-1.0.0rc7.dist-info}/top_level.txt +0 -0
@@ -12,368 +12,625 @@
12
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
- """Contains the 'hf cache' command group with 'scan' and 'delete' subcommands."""
15
+ """Contains the 'hf cache' command group with cache management subcommands."""
16
16
 
17
- import os
17
+ import csv
18
+ import json
19
+ import re
20
+ import sys
18
21
  import time
22
+ from collections import defaultdict
23
+ from dataclasses import dataclass
19
24
  from enum import Enum
20
- from functools import wraps
21
- from tempfile import mkstemp
22
- from typing import Annotated, Any, Callable, Iterable, Optional, Union
25
+ from typing import Annotated, Any, Callable, Dict, List, Mapping, Optional, Tuple
23
26
 
24
27
  import typer
25
28
 
26
- from ..utils import ANSI, CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir, tabulate
29
+ from ..utils import (
30
+ ANSI,
31
+ CachedRepoInfo,
32
+ CachedRevisionInfo,
33
+ CacheNotFound,
34
+ HFCacheInfo,
35
+ _format_size,
36
+ scan_cache_dir,
37
+ tabulate,
38
+ )
39
+ from ..utils._parsing import parse_duration, parse_size
27
40
  from ._cli_utils import typer_factory
28
41
 
29
42
 
30
- # --- DELETE helpers (from delete_cache.py) ---
31
- try:
32
- from InquirerPy import inquirer
33
- from InquirerPy.base.control import Choice
34
- from InquirerPy.separator import Separator
43
+ cache_cli = typer_factory(help="Manage local cache directory.")
44
+
45
+
46
+ #### Cache helper utilities
47
+
48
+
49
+ class OutputFormat(str, Enum):
50
+ table = "table"
51
+ json = "json"
52
+ csv = "csv"
53
+
54
+
55
+ @dataclass(frozen=True)
56
+ class _DeletionResolution:
57
+ revisions: frozenset[str]
58
+ selected: dict[CachedRepoInfo, frozenset[CachedRevisionInfo]]
59
+ missing: tuple[str, ...]
60
+
61
+
62
+ _FILTER_PATTERN = re.compile(r"^(?P<key>[a-zA-Z_]+)\s*(?P<op>==|!=|>=|<=|>|<|=)\s*(?P<value>.+)$")
63
+ _ALLOWED_OPERATORS = {"=", "!=", ">", "<", ">=", "<="}
64
+ _FILTER_KEYS = {"accessed", "modified", "refs", "size", "type"}
65
+
66
+
67
+ @dataclass(frozen=True)
68
+ class CacheDeletionCounts:
69
+ """Simple counters summarizing cache deletions for CLI messaging."""
70
+
71
+ repo_count: int
72
+ partial_revision_count: int
73
+ total_revision_count: int
74
+
75
+
76
+ CacheEntry = Tuple[CachedRepoInfo, Optional[CachedRevisionInfo]]
77
+ RepoRefsMap = Dict[CachedRepoInfo, frozenset[str]]
78
+
35
79
 
36
- _inquirer_py_available = True
37
- except ImportError:
38
- _inquirer_py_available = False
80
+ def summarize_deletions(
81
+ selected_by_repo: Mapping[CachedRepoInfo, frozenset[CachedRevisionInfo]],
82
+ ) -> CacheDeletionCounts:
83
+ """Summarize deletions across repositories."""
84
+ repo_count = 0
85
+ total_revisions = 0
86
+ revisions_in_full_repos = 0
39
87
 
40
- _CANCEL_DELETION_STR = "CANCEL_DELETION"
88
+ for repo, revisions in selected_by_repo.items():
89
+ total_revisions += len(revisions)
90
+ if len(revisions) == len(repo.revisions):
91
+ repo_count += 1
92
+ revisions_in_full_repos += len(revisions)
41
93
 
94
+ partial_revision_count = total_revisions - revisions_in_full_repos
95
+ return CacheDeletionCounts(repo_count, partial_revision_count, total_revisions)
42
96
 
43
- class SortingOption(str, Enum):
44
- alphabetical = "alphabetical"
45
- lastUpdated = "lastUpdated"
46
- lastUsed = "lastUsed"
47
- size = "size"
48
97
 
98
+ def print_cache_selected_revisions(selected_by_repo: Mapping[CachedRepoInfo, frozenset[CachedRevisionInfo]]) -> None:
99
+ """Pretty-print selected cache revisions during confirmation prompts."""
100
+ for repo in sorted(selected_by_repo.keys(), key=lambda repo: (repo.repo_type, repo.repo_id.lower())):
101
+ repo_key = f"{repo.repo_type}/{repo.repo_id}"
102
+ revisions = sorted(selected_by_repo[repo], key=lambda rev: rev.commit_hash)
103
+ if len(revisions) == len(repo.revisions):
104
+ print(f" - {repo_key} (entire repo)")
105
+ continue
49
106
 
50
- def require_inquirer_py(fn: Callable) -> Callable:
51
- @wraps(fn)
52
- def _inner(*args, **kwargs):
53
- if not _inquirer_py_available:
54
- raise ImportError(
55
- "The 'cache delete' command requires extra dependencies for the TUI.\n"
56
- "Please run 'pip install \"huggingface_hub[cli]\"' to install them.\n"
57
- "Otherwise, disable TUI using the '--disable-tui' flag."
107
+ print(f" - {repo_key}:")
108
+ for revision in revisions:
109
+ refs = " ".join(sorted(revision.refs)) or "(detached)"
110
+ print(f" {revision.commit_hash} [{refs}] {revision.size_on_disk_str}")
111
+
112
+
113
+ def build_cache_index(
114
+ hf_cache_info: HFCacheInfo,
115
+ ) -> Tuple[
116
+ Dict[str, CachedRepoInfo],
117
+ Dict[str, Tuple[CachedRepoInfo, CachedRevisionInfo]],
118
+ ]:
119
+ """Create lookup tables so CLI commands can resolve repo ids and revisions quickly."""
120
+ repo_lookup: dict[str, CachedRepoInfo] = {}
121
+ revision_lookup: dict[str, tuple[CachedRepoInfo, CachedRevisionInfo]] = {}
122
+ for repo in hf_cache_info.repos:
123
+ repo_key = repo.cache_id.lower()
124
+ repo_lookup[repo_key] = repo
125
+ for revision in repo.revisions:
126
+ revision_lookup[revision.commit_hash.lower()] = (repo, revision)
127
+ return repo_lookup, revision_lookup
128
+
129
+
130
+ def collect_cache_entries(
131
+ hf_cache_info: HFCacheInfo, *, include_revisions: bool
132
+ ) -> Tuple[List[CacheEntry], RepoRefsMap]:
133
+ """Flatten cache metadata into rows consumed by `hf cache ls`."""
134
+ entries: List[CacheEntry] = []
135
+ repo_refs_map: RepoRefsMap = {}
136
+ sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: (repo.repo_type, repo.repo_id.lower()))
137
+ for repo in sorted_repos:
138
+ repo_refs_map[repo] = frozenset({ref for revision in repo.revisions for ref in revision.refs})
139
+ if include_revisions:
140
+ for revision in sorted(repo.revisions, key=lambda rev: rev.commit_hash):
141
+ entries.append((repo, revision))
142
+ else:
143
+ entries.append((repo, None))
144
+ if include_revisions:
145
+ entries.sort(
146
+ key=lambda entry: (
147
+ entry[0].cache_id,
148
+ entry[1].commit_hash if entry[1] is not None else "",
58
149
  )
59
- return fn(*args, **kwargs)
150
+ )
151
+ else:
152
+ entries.sort(key=lambda entry: entry[0].cache_id)
153
+ return entries, repo_refs_map
154
+
155
+
156
+ def compile_cache_filter(
157
+ expr: str, repo_refs_map: RepoRefsMap
158
+ ) -> Callable[[CachedRepoInfo, Optional[CachedRevisionInfo], float], bool]:
159
+ """Convert a `hf cache ls` filter expression into the yes/no test we apply to each cache entry before displaying it."""
160
+ match = _FILTER_PATTERN.match(expr.strip())
161
+ if not match:
162
+ raise ValueError(f"Invalid filter expression: '{expr}'.")
163
+
164
+ key = match.group("key").lower()
165
+ op = match.group("op")
166
+ value_raw = match.group("value").strip()
167
+
168
+ if op not in _ALLOWED_OPERATORS:
169
+ raise ValueError(f"Unsupported operator '{op}' in filter '{expr}'. Must be one of {list(_ALLOWED_OPERATORS)}.")
170
+
171
+ if key not in _FILTER_KEYS:
172
+ raise ValueError(f"Unsupported filter key '{key}' in '{expr}'. Must be one of {list(_FILTER_KEYS)}.")
173
+ # at this point we know that key is in `_FILTER_KEYS`
174
+ if key == "size":
175
+ size_threshold = parse_size(value_raw)
176
+ return lambda repo, revision, _: _compare_numeric(
177
+ revision.size_on_disk if revision is not None else repo.size_on_disk,
178
+ op,
179
+ size_threshold,
180
+ )
181
+
182
+ if key in {"modified", "accessed"}:
183
+ seconds = parse_duration(value_raw.strip())
184
+
185
+ def _time_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], now: float) -> bool:
186
+ timestamp = (
187
+ repo.last_accessed
188
+ if key == "accessed"
189
+ else revision.last_modified
190
+ if revision is not None
191
+ else repo.last_modified
192
+ )
193
+ if timestamp is None:
194
+ return False
195
+ return _compare_numeric(now - timestamp, op, seconds)
196
+
197
+ return _time_filter
198
+
199
+ if key == "type":
200
+ expected = value_raw.lower()
201
+
202
+ if op != "=":
203
+ raise ValueError(f"Only '=' is supported for 'type' filters. Got '{op}'.")
204
+
205
+ def _type_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], _: float) -> bool:
206
+ return repo.repo_type.lower() == expected
207
+
208
+ return _type_filter
209
+
210
+ else: # key == "refs"
211
+ if op != "=":
212
+ raise ValueError(f"Only '=' is supported for 'refs' filters. Got {op}.")
213
+
214
+ def _refs_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], _: float) -> bool:
215
+ refs = revision.refs if revision is not None else repo_refs_map.get(repo, frozenset())
216
+ return value_raw.lower() in [ref.lower() for ref in refs]
217
+
218
+ return _refs_filter
219
+
220
+
221
+ def _build_cache_export_payload(
222
+ entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
223
+ ) -> List[Dict[str, Any]]:
224
+ """Normalize cache entries into serializable records for JSON/CSV exports."""
225
+ payload: List[Dict[str, Any]] = []
226
+ for repo, revision in entries:
227
+ if include_revisions:
228
+ if revision is None:
229
+ continue
230
+ record: Dict[str, Any] = {
231
+ "repo_id": repo.repo_id,
232
+ "repo_type": repo.repo_type,
233
+ "revision": revision.commit_hash,
234
+ "snapshot_path": str(revision.snapshot_path),
235
+ "size_on_disk": revision.size_on_disk,
236
+ "last_accessed": repo.last_accessed,
237
+ "last_modified": revision.last_modified,
238
+ "refs": sorted(revision.refs),
239
+ }
240
+ else:
241
+ record = {
242
+ "repo_id": repo.repo_id,
243
+ "repo_type": repo.repo_type,
244
+ "size_on_disk": repo.size_on_disk,
245
+ "last_accessed": repo.last_accessed,
246
+ "last_modified": repo.last_modified,
247
+ "refs": sorted(repo_refs_map.get(repo, frozenset())),
248
+ }
249
+ payload.append(record)
250
+ return payload
251
+
252
+
253
+ def print_cache_entries_table(
254
+ entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
255
+ ) -> None:
256
+ """Render cache entries as a table and show a human-readable summary."""
257
+ if not entries:
258
+ message = "No cached revisions found." if include_revisions else "No cached repositories found."
259
+ print(message)
260
+ return
261
+ table_rows: List[List[str]]
262
+ if include_revisions:
263
+ headers = ["ID", "REVISION", "SIZE", "LAST_MODIFIED", "REFS"]
264
+ table_rows = [
265
+ [
266
+ repo.cache_id,
267
+ revision.commit_hash,
268
+ revision.size_on_disk_str.rjust(8),
269
+ revision.last_modified_str,
270
+ " ".join(sorted(revision.refs)),
271
+ ]
272
+ for repo, revision in entries
273
+ if revision is not None
274
+ ]
275
+ else:
276
+ headers = ["ID", "SIZE", "LAST_ACCESSED", "LAST_MODIFIED", "REFS"]
277
+ table_rows = [
278
+ [
279
+ repo.cache_id,
280
+ repo.size_on_disk_str.rjust(8),
281
+ repo.last_accessed_str or "",
282
+ repo.last_modified_str,
283
+ " ".join(sorted(repo_refs_map.get(repo, frozenset()))),
284
+ ]
285
+ for repo, _ in entries
286
+ ]
287
+
288
+ print(tabulate(table_rows, headers=headers)) # type: ignore[arg-type]
289
+
290
+ unique_repos = {repo for repo, _ in entries}
291
+ repo_count = len(unique_repos)
292
+ if include_revisions:
293
+ revision_count = sum(1 for _, revision in entries if revision is not None)
294
+ total_size = sum(revision.size_on_disk for _, revision in entries if revision is not None)
295
+ else:
296
+ revision_count = sum(len(repo.revisions) for repo in unique_repos)
297
+ total_size = sum(repo.size_on_disk for repo in unique_repos)
60
298
 
61
- return _inner
299
+ summary = f"\nFound {repo_count} repo(s) for a total of {revision_count} revision(s) and {_format_size(total_size)} on disk."
300
+ print(ANSI.bold(summary))
62
301
 
63
302
 
64
- cache_cli = typer_factory(help="Manage local cache directory.")
303
+ def print_cache_entries_json(
304
+ entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
305
+ ) -> None:
306
+ """Dump cache entries as JSON for scripting or automation."""
307
+ payload = _build_cache_export_payload(entries, include_revisions=include_revisions, repo_refs_map=repo_refs_map)
308
+ json.dump(payload, sys.stdout, indent=2)
309
+ sys.stdout.write("\n")
310
+
311
+
312
+ def print_cache_entries_csv(entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap) -> None:
313
+ """Export cache entries as CSV rows with the shared payload format."""
314
+ records = _build_cache_export_payload(entries, include_revisions=include_revisions, repo_refs_map=repo_refs_map)
315
+ writer = csv.writer(sys.stdout)
316
+
317
+ if include_revisions:
318
+ headers = [
319
+ "repo_id",
320
+ "repo_type",
321
+ "revision",
322
+ "snapshot_path",
323
+ "size_on_disk",
324
+ "last_accessed",
325
+ "last_modified",
326
+ "refs",
327
+ ]
328
+ else:
329
+ headers = ["repo_id", "repo_type", "size_on_disk", "last_accessed", "last_modified", "refs"]
330
+
331
+ writer.writerow(headers)
65
332
 
333
+ if not records:
334
+ return
66
335
 
67
- @cache_cli.command("scan", help="Scan the cache directory")
68
- def cache_scan(
69
- dir: Annotated[
336
+ for record in records:
337
+ refs = record["refs"]
338
+ if include_revisions:
339
+ row = [
340
+ record.get("repo_id", ""),
341
+ record.get("repo_type", ""),
342
+ record.get("revision", ""),
343
+ record.get("snapshot_path", ""),
344
+ record.get("size_on_disk"),
345
+ record.get("last_accessed"),
346
+ record.get("last_modified"),
347
+ " ".join(refs) if refs else "",
348
+ ]
349
+ else:
350
+ row = [
351
+ record.get("repo_id", ""),
352
+ record.get("repo_type", ""),
353
+ record.get("size_on_disk"),
354
+ record.get("last_accessed"),
355
+ record.get("last_modified"),
356
+ " ".join(refs) if refs else "",
357
+ ]
358
+ writer.writerow(row)
359
+
360
+
361
+ def _compare_numeric(left: Optional[float], op: str, right: float) -> bool:
362
+ """Evaluate numeric comparisons for filters."""
363
+ if left is None:
364
+ return False
365
+
366
+ comparisons = {
367
+ "=": left == right,
368
+ "!=": left != right,
369
+ ">": left > right,
370
+ "<": left < right,
371
+ ">=": left >= right,
372
+ "<=": left <= right,
373
+ }
374
+
375
+ if op not in comparisons:
376
+ raise ValueError(f"Unsupported numeric comparison operator: {op}")
377
+
378
+ return comparisons[op]
379
+
380
+
381
+ def _resolve_deletion_targets(hf_cache_info: HFCacheInfo, targets: list[str]) -> _DeletionResolution:
382
+ """Resolve the deletion targets into a deletion resolution."""
383
+ repo_lookup, revision_lookup = build_cache_index(hf_cache_info)
384
+
385
+ selected: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
386
+ revisions: set[str] = set()
387
+ missing: list[str] = []
388
+
389
+ for raw_target in targets:
390
+ target = raw_target.strip()
391
+ if not target:
392
+ continue
393
+ lowered = target.lower()
394
+
395
+ if re.fullmatch(r"[0-9a-fA-F]{40}", lowered):
396
+ match = revision_lookup.get(lowered)
397
+ if match is None:
398
+ missing.append(raw_target)
399
+ continue
400
+ repo, revision = match
401
+ selected[repo].add(revision)
402
+ revisions.add(revision.commit_hash)
403
+ continue
404
+
405
+ matched_repo = repo_lookup.get(lowered)
406
+ if matched_repo is None:
407
+ missing.append(raw_target)
408
+ continue
409
+
410
+ for revision in matched_repo.revisions:
411
+ selected[matched_repo].add(revision)
412
+ revisions.add(revision.commit_hash)
413
+
414
+ frozen_selected = {repo: frozenset(revs) for repo, revs in selected.items()}
415
+ return _DeletionResolution(
416
+ revisions=frozenset(revisions),
417
+ selected=frozen_selected,
418
+ missing=tuple(missing),
419
+ )
420
+
421
+
422
+ #### Cache CLI commands
423
+
424
+
425
+ @cache_cli.command()
426
+ def ls(
427
+ cache_dir: Annotated[
70
428
  Optional[str],
71
429
  typer.Option(
72
430
  help="Cache directory to scan (defaults to Hugging Face cache).",
73
431
  ),
74
432
  ] = None,
75
- verbose: Annotated[
76
- int,
433
+ revisions: Annotated[
434
+ bool,
435
+ typer.Option(
436
+ help="Include revisions in the output instead of aggregated repositories.",
437
+ ),
438
+ ] = False,
439
+ filter: Annotated[
440
+ Optional[list[str]],
441
+ typer.Option(
442
+ "-f",
443
+ "--filter",
444
+ help="Filter entries (e.g. 'size>1GB', 'type=model', 'accessed>7d'). Can be used multiple times.",
445
+ ),
446
+ ] = None,
447
+ format: Annotated[
448
+ OutputFormat,
449
+ typer.Option(
450
+ help="Output format.",
451
+ ),
452
+ ] = OutputFormat.table,
453
+ quiet: Annotated[
454
+ bool,
77
455
  typer.Option(
78
- "-v",
79
- "--verbose",
80
- count=True,
81
- help="Increase verbosity (-v, -vv, -vvv).",
456
+ "-q",
457
+ "--quiet",
458
+ help="Print only IDs (repo IDs or revision hashes).",
82
459
  ),
83
- ] = 0,
460
+ ] = False,
84
461
  ) -> None:
462
+ """List cached repositories or revisions."""
85
463
  try:
86
- t0 = time.time()
87
- hf_cache_info = scan_cache_dir(dir)
88
- t1 = time.time()
464
+ hf_cache_info = scan_cache_dir(cache_dir)
89
465
  except CacheNotFound as exc:
90
466
  print(f"Cache directory not found: {str(exc.cache_dir)}")
467
+ raise typer.Exit(code=1)
468
+
469
+ filters = filter or []
470
+
471
+ entries, repo_refs_map = collect_cache_entries(hf_cache_info, include_revisions=revisions)
472
+ try:
473
+ filter_fns = [compile_cache_filter(expr, repo_refs_map) for expr in filters]
474
+ except ValueError as exc:
475
+ raise typer.BadParameter(str(exc)) from exc
476
+
477
+ now = time.time()
478
+ for fn in filter_fns:
479
+ entries = [entry for entry in entries if fn(entry[0], entry[1], now)]
480
+
481
+ if quiet:
482
+ for repo, revision in entries:
483
+ print(revision.commit_hash if revision is not None else repo.cache_id)
91
484
  return
92
- print(get_table(hf_cache_info, verbosity=verbose))
93
- print(
94
- f"\nDone in {round(t1 - t0, 1)}s. Scanned {len(hf_cache_info.repos)} repo(s)"
95
- f" for a total of {ANSI.red(hf_cache_info.size_on_disk_str)}."
96
- )
97
- if len(hf_cache_info.warnings) > 0:
98
- message = f"Got {len(hf_cache_info.warnings)} warning(s) while scanning."
99
- if verbose >= 3:
100
- print(ANSI.gray(message))
101
- for warning in hf_cache_info.warnings:
102
- print(ANSI.gray(str(warning)))
103
- else:
104
- print(ANSI.gray(message + " Use -vvv to print details."))
105
485
 
486
+ formatters = {
487
+ OutputFormat.table: print_cache_entries_table,
488
+ OutputFormat.json: print_cache_entries_json,
489
+ OutputFormat.csv: print_cache_entries_csv,
490
+ }
491
+ return formatters[format](entries, include_revisions=revisions, repo_refs_map=repo_refs_map)
106
492
 
107
- @cache_cli.command("delete", help="Delete revisions from the cache directory")
108
- def cache_delete(
109
- dir: Annotated[
493
+
494
+ @cache_cli.command()
495
+ def rm(
496
+ targets: Annotated[
497
+ list[str],
498
+ typer.Argument(
499
+ help="One or more repo IDs (e.g. model/bert-base-uncased) or revision hashes to delete.",
500
+ ),
501
+ ],
502
+ cache_dir: Annotated[
110
503
  Optional[str],
111
504
  typer.Option(
112
- help="Cache directory (defaults to Hugging Face cache).",
505
+ help="Cache directory to scan (defaults to Hugging Face cache).",
113
506
  ),
114
507
  ] = None,
115
- disable_tui: Annotated[
508
+ yes: Annotated[
116
509
  bool,
117
510
  typer.Option(
118
- help="Disable Terminal User Interface (TUI) mode. Useful if your platform/terminal doesn't support the multiselect menu.",
511
+ "-y",
512
+ "--yes",
513
+ help="Skip confirmation prompt.",
119
514
  ),
120
515
  ] = False,
121
- sort: Annotated[
122
- Optional[SortingOption],
516
+ dry_run: Annotated[
517
+ bool,
123
518
  typer.Option(
124
- help="Sort repositories by the specified criteria. Options: 'alphabetical' (A-Z), 'lastUpdated' (newest first), 'lastUsed' (most recent first), 'size' (largest first).",
519
+ help="Preview deletions without removing anything.",
125
520
  ),
126
- ] = None,
521
+ ] = False,
127
522
  ) -> None:
128
- hf_cache_info = scan_cache_dir(dir)
129
- sort_by = sort.value if sort is not None else None
130
- if disable_tui:
131
- selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=sort_by)
132
- else:
133
- selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=sort_by)
134
- if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
135
- confirm_message = _get_expectations_str(hf_cache_info, selected_hashes) + " Confirm deletion ?"
136
- if disable_tui:
137
- confirmed = _ask_for_confirmation_no_tui(confirm_message)
138
- else:
139
- confirmed = _ask_for_confirmation_tui(confirm_message)
140
- if confirmed:
141
- strategy = hf_cache_info.delete_revisions(*selected_hashes)
142
- print("Start deletion.")
143
- strategy.execute()
144
- print(
145
- f"Done. Deleted {len(strategy.repos)} repo(s) and"
146
- f" {len(strategy.snapshots)} revision(s) for a total of"
147
- f" {strategy.expected_freed_size_str}."
148
- )
149
- return
150
- print("Deletion is cancelled. Do nothing.")
151
-
152
-
153
- def get_table(hf_cache_info: HFCacheInfo, *, verbosity: int = 0) -> str:
154
- if verbosity == 0:
155
- return tabulate(
156
- rows=[
157
- [
158
- repo.repo_id,
159
- repo.repo_type,
160
- "{:>12}".format(repo.size_on_disk_str),
161
- repo.nb_files,
162
- repo.last_accessed_str,
163
- repo.last_modified_str,
164
- ", ".join(sorted(repo.refs)),
165
- str(repo.repo_path),
166
- ]
167
- for repo in sorted(hf_cache_info.repos, key=lambda repo: repo.repo_path)
168
- ],
169
- headers=[
170
- "REPO ID",
171
- "REPO TYPE",
172
- "SIZE ON DISK",
173
- "NB FILES",
174
- "LAST_ACCESSED",
175
- "LAST_MODIFIED",
176
- "REFS",
177
- "LOCAL PATH",
178
- ],
179
- )
180
- else:
181
- return tabulate(
182
- rows=[
183
- [
184
- repo.repo_id,
185
- repo.repo_type,
186
- revision.commit_hash,
187
- "{:>12}".format(revision.size_on_disk_str),
188
- revision.nb_files,
189
- revision.last_modified_str,
190
- ", ".join(sorted(revision.refs)),
191
- str(revision.snapshot_path),
192
- ]
193
- for repo in sorted(hf_cache_info.repos, key=lambda repo: repo.repo_path)
194
- for revision in sorted(repo.revisions, key=lambda revision: revision.commit_hash)
195
- ],
196
- headers=[
197
- "REPO ID",
198
- "REPO TYPE",
199
- "REVISION",
200
- "SIZE ON DISK",
201
- "NB FILES",
202
- "LAST_MODIFIED",
203
- "REFS",
204
- "LOCAL PATH",
205
- ],
206
- )
523
+ """Remove cached repositories or revisions."""
524
+ try:
525
+ hf_cache_info = scan_cache_dir(cache_dir)
526
+ except CacheNotFound as exc:
527
+ print(f"Cache directory not found: {str(exc.cache_dir)}")
528
+ raise typer.Exit(code=1)
207
529
 
530
+ resolution = _resolve_deletion_targets(hf_cache_info, targets)
208
531
 
209
- def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[str] = None):
210
- if sort_by == "alphabetical":
211
- return (repo.repo_type, repo.repo_id.lower())
212
- elif sort_by == "lastUpdated":
213
- return -max(rev.last_modified for rev in repo.revisions)
214
- elif sort_by == "lastUsed":
215
- return -repo.last_accessed
216
- elif sort_by == "size":
217
- return -repo.size_on_disk
218
- else:
219
- return (repo.repo_type, repo.repo_id)
220
-
221
-
222
- @require_inquirer_py
223
- def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: list[str], sort_by: Optional[str] = None) -> list[str]:
224
- choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected, sort_by=sort_by)
225
- checkbox = inquirer.checkbox(
226
- message="Select revisions to delete:",
227
- choices=choices,
228
- cycle=False,
229
- height=100,
230
- instruction=_get_expectations_str(
231
- hf_cache_info, selected_hashes=[c.value for c in choices if isinstance(c, Choice) and c.enabled]
232
- ),
233
- long_instruction="Press <space> to select, <enter> to validate and <ctrl+c> to quit without modification.",
234
- transformer=lambda result: f"{len(result)} revision(s) selected.",
235
- )
532
+ if resolution.missing:
533
+ print("Could not find the following targets in the cache:")
534
+ for entry in resolution.missing:
535
+ print(f" - {entry}")
236
536
 
237
- def _update_expectations(_):
238
- checkbox._instruction = _get_expectations_str(
239
- hf_cache_info,
240
- selected_hashes=[choice["value"] for choice in checkbox.content_control.choices if choice["enabled"]],
241
- )
537
+ if len(resolution.revisions) == 0:
538
+ print("Nothing to delete.")
539
+ raise typer.Exit(code=0)
242
540
 
243
- checkbox.kb_func_lookup["toggle"].append({"func": _update_expectations})
244
- try:
245
- return checkbox.execute()
246
- except KeyboardInterrupt:
247
- return []
541
+ strategy = hf_cache_info.delete_revisions(*sorted(resolution.revisions))
542
+ counts = summarize_deletions(resolution.selected)
248
543
 
544
+ summary_parts: list[str] = []
545
+ if counts.repo_count:
546
+ summary_parts.append(f"{counts.repo_count} repo(s)")
547
+ if counts.partial_revision_count:
548
+ summary_parts.append(f"{counts.partial_revision_count} revision(s)")
549
+ if not summary_parts:
550
+ summary_parts.append(f"{counts.total_revision_count} revision(s)")
249
551
 
250
- @require_inquirer_py
251
- def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
252
- return inquirer.confirm(message, default=default).execute()
552
+ summary_text = " and ".join(summary_parts)
553
+ print(f"About to delete {summary_text} totalling {strategy.expected_freed_size_str}.")
554
+ print_cache_selected_revisions(resolution.selected)
253
555
 
556
+ if dry_run:
557
+ print("Dry run: no files were deleted.")
558
+ return
254
559
 
255
- def _get_tui_choices_from_scan(
256
- repos: Iterable[CachedRepoInfo], preselected: list[str], sort_by: Optional[str] = None
257
- ) -> list:
258
- choices: list[Union["Choice", "Separator"]] = []
259
- choices.append(
260
- Choice(
261
- _CANCEL_DELETION_STR, name="None of the following (if selected, nothing will be deleted).", enabled=False
262
- )
560
+ if not yes and not typer.confirm("Proceed with deletion?", default=False):
561
+ print("Deletion cancelled.")
562
+ return
563
+
564
+ strategy.execute()
565
+ counts = summarize_deletions(resolution.selected)
566
+ print(
567
+ f"Deleted {counts.repo_count} repo(s) and {counts.total_revision_count} revision(s); freed {strategy.expected_freed_size_str}."
263
568
  )
264
- sorted_repos = sorted(repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
265
- for repo in sorted_repos:
266
- choices.append(
267
- Separator(
268
- f"\n{repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str}, used {repo.last_accessed_str})"
269
- )
270
- )
271
- for revision in sorted(repo.revisions, key=_revision_sorting_order):
272
- choices.append(
273
- Choice(
274
- revision.commit_hash,
275
- name=(
276
- f"{revision.commit_hash[:8]}: {', '.join(sorted(revision.refs)) or '(detached)'} # modified {revision.last_modified_str}"
277
- ),
278
- enabled=revision.commit_hash in preselected,
279
- )
280
- )
281
- return choices
282
569
 
283
570
 
284
- def _manual_review_no_tui(
285
- hf_cache_info: HFCacheInfo, preselected: list[str], sort_by: Optional[str] = None
286
- ) -> list[str]:
287
- fd, tmp_path = mkstemp(suffix=".txt")
288
- os.close(fd)
289
- lines = []
290
- sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
291
- for repo in sorted_repos:
292
- lines.append(
293
- f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str}, used {repo.last_accessed_str})"
294
- )
295
- for revision in sorted(repo.revisions, key=_revision_sorting_order):
296
- lines.append(
297
- f"{'' if revision.commit_hash in preselected else '#'} {revision.commit_hash} # Refs: {', '.join(sorted(revision.refs)) or '(detached)'} # modified {revision.last_modified_str}"
298
- )
299
- with open(tmp_path, "w") as f:
300
- f.write(_MANUAL_REVIEW_NO_TUI_INSTRUCTIONS)
301
- f.write("\n".join(lines))
302
- instructions = f"""
303
- TUI is disabled. In order to select which revisions you want to delete, please edit
304
- the following file using the text editor of your choice. Instructions for manual
305
- editing are located at the beginning of the file. Edit the file, save it and confirm
306
- to continue.
307
- File to edit: {ANSI.bold(tmp_path)}
308
- """
309
- print("\n".join(line.strip() for line in instructions.strip().split("\n")))
310
- while True:
311
- selected_hashes = _read_manual_review_tmp_file(tmp_path)
312
- if _ask_for_confirmation_no_tui(
313
- _get_expectations_str(hf_cache_info, selected_hashes) + " Continue ?", default=False
314
- ):
315
- break
316
- os.remove(tmp_path)
317
- return sorted(selected_hashes)
318
-
319
-
320
- def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
321
- YES = ("y", "yes", "1")
322
- NO = ("n", "no", "0")
323
- DEFAULT = ""
324
- ALL = YES + NO + (DEFAULT,)
325
- full_message = message + (" (Y/n) " if default else " (y/N) ")
326
- while True:
327
- answer = input(full_message).lower()
328
- if answer == DEFAULT:
329
- return default
330
- if answer in YES:
331
- return True
332
- if answer in NO:
333
- return False
334
- print(f"Invalid input. Must be one of {ALL}")
335
-
336
-
337
- def _get_expectations_str(hf_cache_info: HFCacheInfo, selected_hashes: list[str]) -> str:
338
- if _CANCEL_DELETION_STR in selected_hashes:
339
- return "Nothing will be deleted."
340
- strategy = hf_cache_info.delete_revisions(*selected_hashes)
341
- return f"{len(selected_hashes)} revisions selected counting for {strategy.expected_freed_size_str}."
342
-
343
-
344
- def _read_manual_review_tmp_file(tmp_path: str) -> list[str]:
345
- with open(tmp_path) as f:
346
- content = f.read()
347
- lines = [line.strip() for line in content.split("\n")]
348
- selected_lines = [line for line in lines if not line.startswith("#")]
349
- selected_hashes = [line.split("#")[0].strip() for line in selected_lines]
350
- return [hash for hash in selected_hashes if len(hash) > 0]
351
-
352
-
353
- _MANUAL_REVIEW_NO_TUI_INSTRUCTIONS = f"""
354
- # INSTRUCTIONS
355
- # ------------
356
- # This is a temporary file created by running `hf cache delete --disable-tui`. It contains a set of revisions that can be deleted from your local cache directory.
357
- #
358
- # Please manually review the revisions you want to delete:
359
- # - Revision hashes can be commented out with '#'.
360
- # - Only non-commented revisions in this file will be deleted.
361
- # - Revision hashes that are removed from this file are ignored as well.
362
- # - If `{_CANCEL_DELETION_STR}` line is uncommented, the all cache deletion is cancelled and no changes will be applied.
363
- #
364
- # Once you've manually reviewed this file, please confirm deletion in the terminal. This file will be automatically removed once done.
365
- # ------------
571
+ @cache_cli.command()
572
+ def prune(
573
+ cache_dir: Annotated[
574
+ Optional[str],
575
+ typer.Option(
576
+ help="Cache directory to scan (defaults to Hugging Face cache).",
577
+ ),
578
+ ] = None,
579
+ yes: Annotated[
580
+ bool,
581
+ typer.Option(
582
+ "-y",
583
+ "--yes",
584
+ help="Skip confirmation prompt.",
585
+ ),
586
+ ] = False,
587
+ dry_run: Annotated[
588
+ bool,
589
+ typer.Option(
590
+ help="Preview deletions without removing anything.",
591
+ ),
592
+ ] = False,
593
+ ) -> None:
594
+ """Remove detached revisions from the cache."""
595
+ try:
596
+ hf_cache_info = scan_cache_dir(cache_dir)
597
+ except CacheNotFound as exc:
598
+ print(f"Cache directory not found: {str(exc.cache_dir)}")
599
+ raise typer.Exit(code=1)
600
+
601
+ selected: dict[CachedRepoInfo, frozenset[CachedRevisionInfo]] = {}
602
+ revisions: set[str] = set()
603
+ for repo in hf_cache_info.repos:
604
+ detached = frozenset(revision for revision in repo.revisions if len(revision.refs) == 0)
605
+ if not detached:
606
+ continue
607
+ selected[repo] = detached
608
+ revisions.update(revision.commit_hash for revision in detached)
609
+
610
+ if len(revisions) == 0:
611
+ print("No unreferenced revisions found. Nothing to prune.")
612
+ return
366
613
 
367
- # KILL SWITCH
368
- # ------------
369
- # Un-comment following line to completely cancel the deletion process
370
- # {_CANCEL_DELETION_STR}
371
- # ------------
614
+ resolution = _DeletionResolution(
615
+ revisions=frozenset(revisions),
616
+ selected=selected,
617
+ missing=(),
618
+ )
619
+ strategy = hf_cache_info.delete_revisions(*sorted(resolution.revisions))
620
+ counts = summarize_deletions(selected)
372
621
 
373
- # REVISIONS
374
- # ------------
375
- """.strip()
622
+ print(
623
+ f"About to delete {counts.total_revision_count} unreferenced revision(s) ({strategy.expected_freed_size_str} total)."
624
+ )
625
+ print_cache_selected_revisions(selected)
376
626
 
627
+ if dry_run:
628
+ print("Dry run: no files were deleted.")
629
+ return
630
+
631
+ if not yes and not typer.confirm("Proceed?"):
632
+ print("Pruning cancelled.")
633
+ return
377
634
 
378
- def _revision_sorting_order(revision: CachedRevisionInfo) -> Any:
379
- return revision.last_modified
635
+ strategy.execute()
636
+ print(f"Deleted {counts.total_revision_count} unreferenced revision(s); freed {strategy.expected_freed_size_str}.")