huggingface-hub 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. huggingface_hub/__init__.py +145 -46
  2. huggingface_hub/_commit_api.py +168 -119
  3. huggingface_hub/_commit_scheduler.py +15 -15
  4. huggingface_hub/_inference_endpoints.py +15 -12
  5. huggingface_hub/_jobs_api.py +301 -0
  6. huggingface_hub/_local_folder.py +18 -3
  7. huggingface_hub/_login.py +31 -63
  8. huggingface_hub/_oauth.py +460 -0
  9. huggingface_hub/_snapshot_download.py +239 -80
  10. huggingface_hub/_space_api.py +5 -5
  11. huggingface_hub/_tensorboard_logger.py +15 -19
  12. huggingface_hub/_upload_large_folder.py +172 -76
  13. huggingface_hub/_webhooks_payload.py +3 -3
  14. huggingface_hub/_webhooks_server.py +13 -25
  15. huggingface_hub/{commands → cli}/__init__.py +1 -15
  16. huggingface_hub/cli/_cli_utils.py +173 -0
  17. huggingface_hub/cli/auth.py +147 -0
  18. huggingface_hub/cli/cache.py +841 -0
  19. huggingface_hub/cli/download.py +189 -0
  20. huggingface_hub/cli/hf.py +60 -0
  21. huggingface_hub/cli/inference_endpoints.py +377 -0
  22. huggingface_hub/cli/jobs.py +772 -0
  23. huggingface_hub/cli/lfs.py +175 -0
  24. huggingface_hub/cli/repo.py +315 -0
  25. huggingface_hub/cli/repo_files.py +94 -0
  26. huggingface_hub/{commands/env.py → cli/system.py} +10 -13
  27. huggingface_hub/cli/upload.py +294 -0
  28. huggingface_hub/cli/upload_large_folder.py +117 -0
  29. huggingface_hub/community.py +20 -12
  30. huggingface_hub/constants.py +38 -53
  31. huggingface_hub/dataclasses.py +609 -0
  32. huggingface_hub/errors.py +80 -30
  33. huggingface_hub/fastai_utils.py +30 -41
  34. huggingface_hub/file_download.py +435 -351
  35. huggingface_hub/hf_api.py +2050 -1124
  36. huggingface_hub/hf_file_system.py +269 -152
  37. huggingface_hub/hub_mixin.py +43 -63
  38. huggingface_hub/inference/_client.py +347 -434
  39. huggingface_hub/inference/_common.py +133 -121
  40. huggingface_hub/inference/_generated/_async_client.py +397 -541
  41. huggingface_hub/inference/_generated/types/__init__.py +5 -1
  42. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  43. huggingface_hub/inference/_generated/types/base.py +10 -7
  44. huggingface_hub/inference/_generated/types/chat_completion.py +59 -23
  45. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  46. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  47. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  48. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  49. huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
  50. huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
  51. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  52. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  53. huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
  54. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  55. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  56. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  57. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  58. huggingface_hub/inference/_generated/types/translation.py +2 -2
  59. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  60. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  61. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  62. huggingface_hub/inference/_mcp/__init__.py +0 -0
  63. huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
  64. huggingface_hub/inference/_mcp/agent.py +100 -0
  65. huggingface_hub/inference/_mcp/cli.py +247 -0
  66. huggingface_hub/inference/_mcp/constants.py +81 -0
  67. huggingface_hub/inference/_mcp/mcp_client.py +395 -0
  68. huggingface_hub/inference/_mcp/types.py +45 -0
  69. huggingface_hub/inference/_mcp/utils.py +128 -0
  70. huggingface_hub/inference/_providers/__init__.py +82 -7
  71. huggingface_hub/inference/_providers/_common.py +129 -27
  72. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  73. huggingface_hub/inference/_providers/cerebras.py +1 -1
  74. huggingface_hub/inference/_providers/clarifai.py +13 -0
  75. huggingface_hub/inference/_providers/cohere.py +20 -3
  76. huggingface_hub/inference/_providers/fal_ai.py +183 -56
  77. huggingface_hub/inference/_providers/featherless_ai.py +38 -0
  78. huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
  79. huggingface_hub/inference/_providers/groq.py +9 -0
  80. huggingface_hub/inference/_providers/hf_inference.py +69 -30
  81. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  82. huggingface_hub/inference/_providers/nebius.py +33 -5
  83. huggingface_hub/inference/_providers/novita.py +5 -5
  84. huggingface_hub/inference/_providers/nscale.py +44 -0
  85. huggingface_hub/inference/_providers/openai.py +3 -1
  86. huggingface_hub/inference/_providers/publicai.py +6 -0
  87. huggingface_hub/inference/_providers/replicate.py +31 -13
  88. huggingface_hub/inference/_providers/sambanova.py +18 -4
  89. huggingface_hub/inference/_providers/scaleway.py +28 -0
  90. huggingface_hub/inference/_providers/together.py +20 -5
  91. huggingface_hub/inference/_providers/wavespeed.py +138 -0
  92. huggingface_hub/inference/_providers/zai_org.py +17 -0
  93. huggingface_hub/lfs.py +33 -100
  94. huggingface_hub/repocard.py +34 -38
  95. huggingface_hub/repocard_data.py +57 -57
  96. huggingface_hub/serialization/__init__.py +0 -1
  97. huggingface_hub/serialization/_base.py +12 -15
  98. huggingface_hub/serialization/_dduf.py +8 -8
  99. huggingface_hub/serialization/_torch.py +69 -69
  100. huggingface_hub/utils/__init__.py +19 -8
  101. huggingface_hub/utils/_auth.py +7 -7
  102. huggingface_hub/utils/_cache_manager.py +92 -147
  103. huggingface_hub/utils/_chunk_utils.py +2 -3
  104. huggingface_hub/utils/_deprecation.py +1 -1
  105. huggingface_hub/utils/_dotenv.py +55 -0
  106. huggingface_hub/utils/_experimental.py +7 -5
  107. huggingface_hub/utils/_fixes.py +0 -10
  108. huggingface_hub/utils/_git_credential.py +5 -5
  109. huggingface_hub/utils/_headers.py +8 -30
  110. huggingface_hub/utils/_http.py +398 -239
  111. huggingface_hub/utils/_pagination.py +4 -4
  112. huggingface_hub/utils/_parsing.py +98 -0
  113. huggingface_hub/utils/_paths.py +5 -5
  114. huggingface_hub/utils/_runtime.py +61 -24
  115. huggingface_hub/utils/_safetensors.py +21 -21
  116. huggingface_hub/utils/_subprocess.py +9 -9
  117. huggingface_hub/utils/_telemetry.py +4 -4
  118. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
  119. huggingface_hub/utils/_typing.py +25 -5
  120. huggingface_hub/utils/_validators.py +55 -74
  121. huggingface_hub/utils/_verification.py +167 -0
  122. huggingface_hub/utils/_xet.py +64 -17
  123. huggingface_hub/utils/_xet_progress_reporting.py +162 -0
  124. huggingface_hub/utils/insecure_hashlib.py +3 -5
  125. huggingface_hub/utils/logging.py +8 -11
  126. huggingface_hub/utils/tqdm.py +5 -4
  127. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -85
  128. huggingface_hub-1.1.3.dist-info/RECORD +155 -0
  129. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
  130. huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
  131. huggingface_hub/commands/delete_cache.py +0 -474
  132. huggingface_hub/commands/download.py +0 -200
  133. huggingface_hub/commands/huggingface_cli.py +0 -61
  134. huggingface_hub/commands/lfs.py +0 -200
  135. huggingface_hub/commands/repo_files.py +0 -128
  136. huggingface_hub/commands/scan_cache.py +0 -181
  137. huggingface_hub/commands/tag.py +0 -159
  138. huggingface_hub/commands/upload.py +0 -314
  139. huggingface_hub/commands/upload_large_folder.py +0 -129
  140. huggingface_hub/commands/user.py +0 -304
  141. huggingface_hub/commands/version.py +0 -37
  142. huggingface_hub/inference_api.py +0 -217
  143. huggingface_hub/keras_mixin.py +0 -500
  144. huggingface_hub/repository.py +0 -1477
  145. huggingface_hub/serialization/_tensorflow.py +0 -95
  146. huggingface_hub/utils/_hf_folder.py +0 -68
  147. huggingface_hub-0.31.0rc0.dist-info/RECORD +0 -135
  148. huggingface_hub-0.31.0rc0.dist-info/entry_points.txt +0 -6
  149. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
  150. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,841 @@
1
+ # coding=utf-8
2
+ # Copyright 2025-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains the 'hf cache' command group with cache management subcommands."""
16
+
17
+ import csv
18
+ import json
19
+ import re
20
+ import sys
21
+ import time
22
+ from collections import defaultdict
23
+ from dataclasses import dataclass
24
+ from enum import Enum
25
+ from typing import Annotated, Any, Callable, Dict, List, Mapping, Optional, Tuple
26
+
27
+ import typer
28
+
29
+ from ..utils import (
30
+ ANSI,
31
+ CachedRepoInfo,
32
+ CachedRevisionInfo,
33
+ CacheNotFound,
34
+ HFCacheInfo,
35
+ _format_size,
36
+ scan_cache_dir,
37
+ tabulate,
38
+ )
39
+ from ..utils._parsing import parse_duration, parse_size
40
+ from ._cli_utils import RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api, typer_factory
41
+
42
+
43
+ cache_cli = typer_factory(help="Manage local cache directory.")
44
+
45
+
46
+ #### Cache helper utilities
47
+
48
+
49
+ class OutputFormat(str, Enum):
50
+ table = "table"
51
+ json = "json"
52
+ csv = "csv"
53
+
54
+
55
+ @dataclass(frozen=True)
56
+ class _DeletionResolution:
57
+ revisions: frozenset[str]
58
+ selected: dict[CachedRepoInfo, frozenset[CachedRevisionInfo]]
59
+ missing: tuple[str, ...]
60
+
61
+
62
+ _FILTER_PATTERN = re.compile(r"^(?P<key>[a-zA-Z_]+)\s*(?P<op>==|!=|>=|<=|>|<|=)\s*(?P<value>.+)$")
63
+ _ALLOWED_OPERATORS = {"=", "!=", ">", "<", ">=", "<="}
64
+ _FILTER_KEYS = {"accessed", "modified", "refs", "size", "type"}
65
+ _SORT_KEYS = {"accessed", "modified", "name", "size"}
66
+ _SORT_PATTERN = re.compile(r"^(?P<key>[a-zA-Z_]+)(?::(?P<order>asc|desc))?$")
67
+ _SORT_DEFAULT_ORDER = {
68
+ # Default ordering: accessed/modified/size are descending (newest/biggest first), name is ascending
69
+ "accessed": "desc",
70
+ "modified": "desc",
71
+ "size": "desc",
72
+ "name": "asc",
73
+ }
74
+
75
+
76
+ # Dynamically generate SortOptions enum from _SORT_KEYS
77
+ _sort_options_dict = {}
78
+ for key in sorted(_SORT_KEYS):
79
+ _sort_options_dict[key] = key
80
+ _sort_options_dict[f"{key}_asc"] = f"{key}:asc"
81
+ _sort_options_dict[f"{key}_desc"] = f"{key}:desc"
82
+
83
+ SortOptions = Enum("SortOptions", _sort_options_dict, type=str, module=__name__) # type: ignore
84
+
85
+
86
+ @dataclass(frozen=True)
87
+ class CacheDeletionCounts:
88
+ """Simple counters summarizing cache deletions for CLI messaging."""
89
+
90
+ repo_count: int
91
+ partial_revision_count: int
92
+ total_revision_count: int
93
+
94
+
95
+ CacheEntry = Tuple[CachedRepoInfo, Optional[CachedRevisionInfo]]
96
+ RepoRefsMap = Dict[CachedRepoInfo, frozenset[str]]
97
+
98
+
99
+ def summarize_deletions(
100
+ selected_by_repo: Mapping[CachedRepoInfo, frozenset[CachedRevisionInfo]],
101
+ ) -> CacheDeletionCounts:
102
+ """Summarize deletions across repositories."""
103
+ repo_count = 0
104
+ total_revisions = 0
105
+ revisions_in_full_repos = 0
106
+
107
+ for repo, revisions in selected_by_repo.items():
108
+ total_revisions += len(revisions)
109
+ if len(revisions) == len(repo.revisions):
110
+ repo_count += 1
111
+ revisions_in_full_repos += len(revisions)
112
+
113
+ partial_revision_count = total_revisions - revisions_in_full_repos
114
+ return CacheDeletionCounts(repo_count, partial_revision_count, total_revisions)
115
+
116
+
117
+ def print_cache_selected_revisions(selected_by_repo: Mapping[CachedRepoInfo, frozenset[CachedRevisionInfo]]) -> None:
118
+ """Pretty-print selected cache revisions during confirmation prompts."""
119
+ for repo in sorted(selected_by_repo.keys(), key=lambda repo: (repo.repo_type, repo.repo_id.lower())):
120
+ repo_key = f"{repo.repo_type}/{repo.repo_id}"
121
+ revisions = sorted(selected_by_repo[repo], key=lambda rev: rev.commit_hash)
122
+ if len(revisions) == len(repo.revisions):
123
+ print(f" - {repo_key} (entire repo)")
124
+ continue
125
+
126
+ print(f" - {repo_key}:")
127
+ for revision in revisions:
128
+ refs = " ".join(sorted(revision.refs)) or "(detached)"
129
+ print(f" {revision.commit_hash} [{refs}] {revision.size_on_disk_str}")
130
+
131
+
132
+ def build_cache_index(
133
+ hf_cache_info: HFCacheInfo,
134
+ ) -> Tuple[
135
+ Dict[str, CachedRepoInfo],
136
+ Dict[str, Tuple[CachedRepoInfo, CachedRevisionInfo]],
137
+ ]:
138
+ """Create lookup tables so CLI commands can resolve repo ids and revisions quickly."""
139
+ repo_lookup: dict[str, CachedRepoInfo] = {}
140
+ revision_lookup: dict[str, tuple[CachedRepoInfo, CachedRevisionInfo]] = {}
141
+ for repo in hf_cache_info.repos:
142
+ repo_key = repo.cache_id.lower()
143
+ repo_lookup[repo_key] = repo
144
+ for revision in repo.revisions:
145
+ revision_lookup[revision.commit_hash.lower()] = (repo, revision)
146
+ return repo_lookup, revision_lookup
147
+
148
+
149
+ def collect_cache_entries(
150
+ hf_cache_info: HFCacheInfo, *, include_revisions: bool
151
+ ) -> Tuple[List[CacheEntry], RepoRefsMap]:
152
+ """Flatten cache metadata into rows consumed by `hf cache ls`."""
153
+ entries: List[CacheEntry] = []
154
+ repo_refs_map: RepoRefsMap = {}
155
+ sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: (repo.repo_type, repo.repo_id.lower()))
156
+ for repo in sorted_repos:
157
+ repo_refs_map[repo] = frozenset({ref for revision in repo.revisions for ref in revision.refs})
158
+ if include_revisions:
159
+ for revision in sorted(repo.revisions, key=lambda rev: rev.commit_hash):
160
+ entries.append((repo, revision))
161
+ else:
162
+ entries.append((repo, None))
163
+ if include_revisions:
164
+ entries.sort(
165
+ key=lambda entry: (
166
+ entry[0].cache_id,
167
+ entry[1].commit_hash if entry[1] is not None else "",
168
+ )
169
+ )
170
+ else:
171
+ entries.sort(key=lambda entry: entry[0].cache_id)
172
+ return entries, repo_refs_map
173
+
174
+
175
+ def compile_cache_filter(
176
+ expr: str, repo_refs_map: RepoRefsMap
177
+ ) -> Callable[[CachedRepoInfo, Optional[CachedRevisionInfo], float], bool]:
178
+ """Convert a `hf cache ls` filter expression into the yes/no test we apply to each cache entry before displaying it."""
179
+ match = _FILTER_PATTERN.match(expr.strip())
180
+ if not match:
181
+ raise ValueError(f"Invalid filter expression: '{expr}'.")
182
+
183
+ key = match.group("key").lower()
184
+ op = match.group("op")
185
+ value_raw = match.group("value").strip()
186
+
187
+ if op not in _ALLOWED_OPERATORS:
188
+ raise ValueError(f"Unsupported operator '{op}' in filter '{expr}'. Must be one of {list(_ALLOWED_OPERATORS)}.")
189
+
190
+ if key not in _FILTER_KEYS:
191
+ raise ValueError(f"Unsupported filter key '{key}' in '{expr}'. Must be one of {list(_FILTER_KEYS)}.")
192
+ # at this point we know that key is in `_FILTER_KEYS`
193
+ if key == "size":
194
+ size_threshold = parse_size(value_raw)
195
+ return lambda repo, revision, _: _compare_numeric(
196
+ revision.size_on_disk if revision is not None else repo.size_on_disk,
197
+ op,
198
+ size_threshold,
199
+ )
200
+
201
+ if key in {"modified", "accessed"}:
202
+ seconds = parse_duration(value_raw.strip())
203
+
204
+ def _time_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], now: float) -> bool:
205
+ timestamp = (
206
+ repo.last_accessed
207
+ if key == "accessed"
208
+ else revision.last_modified
209
+ if revision is not None
210
+ else repo.last_modified
211
+ )
212
+ if timestamp is None:
213
+ return False
214
+ return _compare_numeric(now - timestamp, op, seconds)
215
+
216
+ return _time_filter
217
+
218
+ if key == "type":
219
+ expected = value_raw.lower()
220
+
221
+ if op != "=":
222
+ raise ValueError(f"Only '=' is supported for 'type' filters. Got '{op}'.")
223
+
224
+ def _type_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], _: float) -> bool:
225
+ return repo.repo_type.lower() == expected
226
+
227
+ return _type_filter
228
+
229
+ else: # key == "refs"
230
+ if op != "=":
231
+ raise ValueError(f"Only '=' is supported for 'refs' filters. Got {op}.")
232
+
233
+ def _refs_filter(repo: CachedRepoInfo, revision: Optional[CachedRevisionInfo], _: float) -> bool:
234
+ refs = revision.refs if revision is not None else repo_refs_map.get(repo, frozenset())
235
+ return value_raw.lower() in [ref.lower() for ref in refs]
236
+
237
+ return _refs_filter
238
+
239
+
240
+ def _build_cache_export_payload(
241
+ entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
242
+ ) -> List[Dict[str, Any]]:
243
+ """Normalize cache entries into serializable records for JSON/CSV exports."""
244
+ payload: List[Dict[str, Any]] = []
245
+ for repo, revision in entries:
246
+ if include_revisions:
247
+ if revision is None:
248
+ continue
249
+ record: Dict[str, Any] = {
250
+ "repo_id": repo.repo_id,
251
+ "repo_type": repo.repo_type,
252
+ "revision": revision.commit_hash,
253
+ "snapshot_path": str(revision.snapshot_path),
254
+ "size_on_disk": revision.size_on_disk,
255
+ "last_accessed": repo.last_accessed,
256
+ "last_modified": revision.last_modified,
257
+ "refs": sorted(revision.refs),
258
+ }
259
+ else:
260
+ record = {
261
+ "repo_id": repo.repo_id,
262
+ "repo_type": repo.repo_type,
263
+ "size_on_disk": repo.size_on_disk,
264
+ "last_accessed": repo.last_accessed,
265
+ "last_modified": repo.last_modified,
266
+ "refs": sorted(repo_refs_map.get(repo, frozenset())),
267
+ }
268
+ payload.append(record)
269
+ return payload
270
+
271
+
272
+ def print_cache_entries_table(
273
+ entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
274
+ ) -> None:
275
+ """Render cache entries as a table and show a human-readable summary."""
276
+ if not entries:
277
+ message = "No cached revisions found." if include_revisions else "No cached repositories found."
278
+ print(message)
279
+ return
280
+ table_rows: List[List[str]]
281
+ if include_revisions:
282
+ headers = ["ID", "REVISION", "SIZE", "LAST_MODIFIED", "REFS"]
283
+ table_rows = [
284
+ [
285
+ repo.cache_id,
286
+ revision.commit_hash,
287
+ revision.size_on_disk_str.rjust(8),
288
+ revision.last_modified_str,
289
+ " ".join(sorted(revision.refs)),
290
+ ]
291
+ for repo, revision in entries
292
+ if revision is not None
293
+ ]
294
+ else:
295
+ headers = ["ID", "SIZE", "LAST_ACCESSED", "LAST_MODIFIED", "REFS"]
296
+ table_rows = [
297
+ [
298
+ repo.cache_id,
299
+ repo.size_on_disk_str.rjust(8),
300
+ repo.last_accessed_str or "",
301
+ repo.last_modified_str,
302
+ " ".join(sorted(repo_refs_map.get(repo, frozenset()))),
303
+ ]
304
+ for repo, _ in entries
305
+ ]
306
+
307
+ print(tabulate(table_rows, headers=headers)) # type: ignore[arg-type]
308
+
309
+ unique_repos = {repo for repo, _ in entries}
310
+ repo_count = len(unique_repos)
311
+ if include_revisions:
312
+ revision_count = sum(1 for _, revision in entries if revision is not None)
313
+ total_size = sum(revision.size_on_disk for _, revision in entries if revision is not None)
314
+ else:
315
+ revision_count = sum(len(repo.revisions) for repo in unique_repos)
316
+ total_size = sum(repo.size_on_disk for repo in unique_repos)
317
+
318
+ summary = f"\nFound {repo_count} repo(s) for a total of {revision_count} revision(s) and {_format_size(total_size)} on disk."
319
+ print(ANSI.bold(summary))
320
+
321
+
322
+ def print_cache_entries_json(
323
+ entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap
324
+ ) -> None:
325
+ """Dump cache entries as JSON for scripting or automation."""
326
+ payload = _build_cache_export_payload(entries, include_revisions=include_revisions, repo_refs_map=repo_refs_map)
327
+ json.dump(payload, sys.stdout, indent=2)
328
+ sys.stdout.write("\n")
329
+
330
+
331
+ def print_cache_entries_csv(entries: List[CacheEntry], *, include_revisions: bool, repo_refs_map: RepoRefsMap) -> None:
332
+ """Export cache entries as CSV rows with the shared payload format."""
333
+ records = _build_cache_export_payload(entries, include_revisions=include_revisions, repo_refs_map=repo_refs_map)
334
+ writer = csv.writer(sys.stdout)
335
+
336
+ if include_revisions:
337
+ headers = [
338
+ "repo_id",
339
+ "repo_type",
340
+ "revision",
341
+ "snapshot_path",
342
+ "size_on_disk",
343
+ "last_accessed",
344
+ "last_modified",
345
+ "refs",
346
+ ]
347
+ else:
348
+ headers = ["repo_id", "repo_type", "size_on_disk", "last_accessed", "last_modified", "refs"]
349
+
350
+ writer.writerow(headers)
351
+
352
+ if not records:
353
+ return
354
+
355
+ for record in records:
356
+ refs = record["refs"]
357
+ if include_revisions:
358
+ row = [
359
+ record.get("repo_id", ""),
360
+ record.get("repo_type", ""),
361
+ record.get("revision", ""),
362
+ record.get("snapshot_path", ""),
363
+ record.get("size_on_disk"),
364
+ record.get("last_accessed"),
365
+ record.get("last_modified"),
366
+ " ".join(refs) if refs else "",
367
+ ]
368
+ else:
369
+ row = [
370
+ record.get("repo_id", ""),
371
+ record.get("repo_type", ""),
372
+ record.get("size_on_disk"),
373
+ record.get("last_accessed"),
374
+ record.get("last_modified"),
375
+ " ".join(refs) if refs else "",
376
+ ]
377
+ writer.writerow(row)
378
+
379
+
380
+ def _compare_numeric(left: Optional[float], op: str, right: float) -> bool:
381
+ """Evaluate numeric comparisons for filters."""
382
+ if left is None:
383
+ return False
384
+
385
+ comparisons = {
386
+ "=": left == right,
387
+ "!=": left != right,
388
+ ">": left > right,
389
+ "<": left < right,
390
+ ">=": left >= right,
391
+ "<=": left <= right,
392
+ }
393
+
394
+ if op not in comparisons:
395
+ raise ValueError(f"Unsupported numeric comparison operator: {op}")
396
+
397
+ return comparisons[op]
398
+
399
+
400
+ def compile_cache_sort(sort_expr: str) -> tuple[Callable[[CacheEntry], tuple[Any, ...]], bool]:
401
+ """Convert a `hf cache ls` sort expression into a key function for sorting entries.
402
+
403
+ Returns:
404
+ A tuple of (key_function, reverse_flag) where reverse_flag indicates whether
405
+ to sort in descending order (True) or ascending order (False).
406
+ """
407
+ match = _SORT_PATTERN.match(sort_expr.strip().lower())
408
+ if not match:
409
+ raise ValueError(f"Invalid sort expression: '{sort_expr}'. Expected format: 'key' or 'key:asc' or 'key:desc'.")
410
+
411
+ key = match.group("key").lower()
412
+ explicit_order = match.group("order")
413
+
414
+ if key not in _SORT_KEYS:
415
+ raise ValueError(f"Unsupported sort key '{key}' in '{sort_expr}'. Must be one of {list(_SORT_KEYS)}.")
416
+
417
+ # Use explicit order if provided, otherwise use default for the key
418
+ order = explicit_order if explicit_order else _SORT_DEFAULT_ORDER[key]
419
+ reverse = order == "desc"
420
+
421
+ def _sort_key(entry: CacheEntry) -> tuple[Any, ...]:
422
+ repo, revision = entry
423
+
424
+ if key == "name":
425
+ # Sort by cache_id (repo type/id)
426
+ value: Any = repo.cache_id.lower()
427
+ return (value,)
428
+
429
+ if key == "size":
430
+ # Use revision size if available, otherwise repo size
431
+ value = revision.size_on_disk if revision is not None else repo.size_on_disk
432
+ return (value,)
433
+
434
+ if key == "accessed":
435
+ # For revisions, accessed is not available per-revision, use repo's last_accessed
436
+ # For repos, use repo's last_accessed
437
+ value = repo.last_accessed if repo.last_accessed is not None else 0.0
438
+ return (value,)
439
+
440
+ if key == "modified":
441
+ # Use revision's last_modified if available, otherwise repo's last_modified
442
+ if revision is not None:
443
+ value = revision.last_modified if revision.last_modified is not None else 0.0
444
+ else:
445
+ value = repo.last_modified if repo.last_modified is not None else 0.0
446
+ return (value,)
447
+
448
+ # Should never reach here due to validation above
449
+ raise ValueError(f"Unsupported sort key: {key}")
450
+
451
+ return _sort_key, reverse
452
+
453
+
454
+ def _resolve_deletion_targets(hf_cache_info: HFCacheInfo, targets: list[str]) -> _DeletionResolution:
455
+ """Resolve the deletion targets into a deletion resolution."""
456
+ repo_lookup, revision_lookup = build_cache_index(hf_cache_info)
457
+
458
+ selected: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
459
+ revisions: set[str] = set()
460
+ missing: list[str] = []
461
+
462
+ for raw_target in targets:
463
+ target = raw_target.strip()
464
+ if not target:
465
+ continue
466
+ lowered = target.lower()
467
+
468
+ if re.fullmatch(r"[0-9a-fA-F]{40}", lowered):
469
+ match = revision_lookup.get(lowered)
470
+ if match is None:
471
+ missing.append(raw_target)
472
+ continue
473
+ repo, revision = match
474
+ selected[repo].add(revision)
475
+ revisions.add(revision.commit_hash)
476
+ continue
477
+
478
+ matched_repo = repo_lookup.get(lowered)
479
+ if matched_repo is None:
480
+ missing.append(raw_target)
481
+ continue
482
+
483
+ for revision in matched_repo.revisions:
484
+ selected[matched_repo].add(revision)
485
+ revisions.add(revision.commit_hash)
486
+
487
+ frozen_selected = {repo: frozenset(revs) for repo, revs in selected.items()}
488
+ return _DeletionResolution(
489
+ revisions=frozenset(revisions),
490
+ selected=frozen_selected,
491
+ missing=tuple(missing),
492
+ )
493
+
494
+
495
+ #### Cache CLI commands
496
+
497
+
498
+ @cache_cli.command()
499
+ def ls(
500
+ cache_dir: Annotated[
501
+ Optional[str],
502
+ typer.Option(
503
+ help="Cache directory to scan (defaults to Hugging Face cache).",
504
+ ),
505
+ ] = None,
506
+ revisions: Annotated[
507
+ bool,
508
+ typer.Option(
509
+ help="Include revisions in the output instead of aggregated repositories.",
510
+ ),
511
+ ] = False,
512
+ filter: Annotated[
513
+ Optional[list[str]],
514
+ typer.Option(
515
+ "-f",
516
+ "--filter",
517
+ help="Filter entries (e.g. 'size>1GB', 'type=model', 'accessed>7d'). Can be used multiple times.",
518
+ ),
519
+ ] = None,
520
+ format: Annotated[
521
+ OutputFormat,
522
+ typer.Option(
523
+ help="Output format.",
524
+ ),
525
+ ] = OutputFormat.table,
526
+ quiet: Annotated[
527
+ bool,
528
+ typer.Option(
529
+ "-q",
530
+ "--quiet",
531
+ help="Print only IDs (repo IDs or revision hashes).",
532
+ ),
533
+ ] = False,
534
+ sort: Annotated[
535
+ Optional[SortOptions],
536
+ typer.Option(
537
+ help="Sort entries by key. Supported keys: 'accessed', 'modified', 'name', 'size'. "
538
+ "Append ':asc' or ':desc' to explicitly set the order (e.g., 'modified:asc'). "
539
+ "Defaults: 'accessed', 'modified', 'size' default to 'desc' (newest/biggest first); "
540
+ "'name' defaults to 'asc' (alphabetical).",
541
+ ),
542
+ ] = None,
543
+ limit: Annotated[
544
+ Optional[int],
545
+ typer.Option(
546
+ help="Limit the number of results returned. Returns only the top N entries after sorting.",
547
+ ),
548
+ ] = None,
549
+ ) -> None:
550
+ """List cached repositories or revisions."""
551
+ try:
552
+ hf_cache_info = scan_cache_dir(cache_dir)
553
+ except CacheNotFound as exc:
554
+ print(f"Cache directory not found: {str(exc.cache_dir)}")
555
+ raise typer.Exit(code=1) from exc
556
+
557
+ filters = filter or []
558
+
559
+ entries, repo_refs_map = collect_cache_entries(hf_cache_info, include_revisions=revisions)
560
+ try:
561
+ filter_fns = [compile_cache_filter(expr, repo_refs_map) for expr in filters]
562
+ except ValueError as exc:
563
+ raise typer.BadParameter(str(exc)) from exc
564
+
565
+ now = time.time()
566
+ for fn in filter_fns:
567
+ entries = [entry for entry in entries if fn(entry[0], entry[1], now)]
568
+
569
+ # Apply sorting if requested
570
+ if sort:
571
+ try:
572
+ sort_key_fn, reverse = compile_cache_sort(sort.value)
573
+ entries.sort(key=sort_key_fn, reverse=reverse)
574
+ except ValueError as exc:
575
+ raise typer.BadParameter(str(exc)) from exc
576
+
577
+ # Apply limit if requested
578
+ if limit is not None:
579
+ if limit < 0:
580
+ raise typer.BadParameter(f"Limit must be a positive integer, got {limit}.")
581
+ entries = entries[:limit]
582
+
583
+ if quiet:
584
+ for repo, revision in entries:
585
+ print(revision.commit_hash if revision is not None else repo.cache_id)
586
+ return
587
+
588
+ formatters = {
589
+ OutputFormat.table: print_cache_entries_table,
590
+ OutputFormat.json: print_cache_entries_json,
591
+ OutputFormat.csv: print_cache_entries_csv,
592
+ }
593
+ return formatters[format](entries, include_revisions=revisions, repo_refs_map=repo_refs_map)
594
+
595
+
596
+ @cache_cli.command()
597
+ def rm(
598
+ targets: Annotated[
599
+ list[str],
600
+ typer.Argument(
601
+ help="One or more repo IDs (e.g. model/bert-base-uncased) or revision hashes to delete.",
602
+ ),
603
+ ],
604
+ cache_dir: Annotated[
605
+ Optional[str],
606
+ typer.Option(
607
+ help="Cache directory to scan (defaults to Hugging Face cache).",
608
+ ),
609
+ ] = None,
610
+ yes: Annotated[
611
+ bool,
612
+ typer.Option(
613
+ "-y",
614
+ "--yes",
615
+ help="Skip confirmation prompt.",
616
+ ),
617
+ ] = False,
618
+ dry_run: Annotated[
619
+ bool,
620
+ typer.Option(
621
+ help="Preview deletions without removing anything.",
622
+ ),
623
+ ] = False,
624
+ ) -> None:
625
+ """Remove cached repositories or revisions."""
626
+ try:
627
+ hf_cache_info = scan_cache_dir(cache_dir)
628
+ except CacheNotFound as exc:
629
+ print(f"Cache directory not found: {str(exc.cache_dir)}")
630
+ raise typer.Exit(code=1)
631
+
632
+ resolution = _resolve_deletion_targets(hf_cache_info, targets)
633
+
634
+ if resolution.missing:
635
+ print("Could not find the following targets in the cache:")
636
+ for entry in resolution.missing:
637
+ print(f" - {entry}")
638
+
639
+ if len(resolution.revisions) == 0:
640
+ print("Nothing to delete.")
641
+ raise typer.Exit(code=0)
642
+
643
+ strategy = hf_cache_info.delete_revisions(*sorted(resolution.revisions))
644
+ counts = summarize_deletions(resolution.selected)
645
+
646
+ summary_parts: list[str] = []
647
+ if counts.repo_count:
648
+ summary_parts.append(f"{counts.repo_count} repo(s)")
649
+ if counts.partial_revision_count:
650
+ summary_parts.append(f"{counts.partial_revision_count} revision(s)")
651
+ if not summary_parts:
652
+ summary_parts.append(f"{counts.total_revision_count} revision(s)")
653
+
654
+ summary_text = " and ".join(summary_parts)
655
+ print(f"About to delete {summary_text} totalling {strategy.expected_freed_size_str}.")
656
+ print_cache_selected_revisions(resolution.selected)
657
+
658
+ if dry_run:
659
+ print("Dry run: no files were deleted.")
660
+ return
661
+
662
+ if not yes and not typer.confirm("Proceed with deletion?", default=False):
663
+ print("Deletion cancelled.")
664
+ return
665
+
666
+ strategy.execute()
667
+ counts = summarize_deletions(resolution.selected)
668
+ print(
669
+ f"Deleted {counts.repo_count} repo(s) and {counts.total_revision_count} revision(s); freed {strategy.expected_freed_size_str}."
670
+ )
671
+
672
+
673
+ @cache_cli.command()
674
+ def prune(
675
+ cache_dir: Annotated[
676
+ Optional[str],
677
+ typer.Option(
678
+ help="Cache directory to scan (defaults to Hugging Face cache).",
679
+ ),
680
+ ] = None,
681
+ yes: Annotated[
682
+ bool,
683
+ typer.Option(
684
+ "-y",
685
+ "--yes",
686
+ help="Skip confirmation prompt.",
687
+ ),
688
+ ] = False,
689
+ dry_run: Annotated[
690
+ bool,
691
+ typer.Option(
692
+ help="Preview deletions without removing anything.",
693
+ ),
694
+ ] = False,
695
+ ) -> None:
696
+ """Remove detached revisions from the cache."""
697
+ try:
698
+ hf_cache_info = scan_cache_dir(cache_dir)
699
+ except CacheNotFound as exc:
700
+ print(f"Cache directory not found: {str(exc.cache_dir)}")
701
+ raise typer.Exit(code=1)
702
+
703
+ selected: dict[CachedRepoInfo, frozenset[CachedRevisionInfo]] = {}
704
+ revisions: set[str] = set()
705
+ for repo in hf_cache_info.repos:
706
+ detached = frozenset(revision for revision in repo.revisions if len(revision.refs) == 0)
707
+ if not detached:
708
+ continue
709
+ selected[repo] = detached
710
+ revisions.update(revision.commit_hash for revision in detached)
711
+
712
+ if len(revisions) == 0:
713
+ print("No unreferenced revisions found. Nothing to prune.")
714
+ return
715
+
716
+ resolution = _DeletionResolution(
717
+ revisions=frozenset(revisions),
718
+ selected=selected,
719
+ missing=(),
720
+ )
721
+ strategy = hf_cache_info.delete_revisions(*sorted(resolution.revisions))
722
+ counts = summarize_deletions(selected)
723
+
724
+ print(
725
+ f"About to delete {counts.total_revision_count} unreferenced revision(s) ({strategy.expected_freed_size_str} total)."
726
+ )
727
+ print_cache_selected_revisions(selected)
728
+
729
+ if dry_run:
730
+ print("Dry run: no files were deleted.")
731
+ return
732
+
733
+ if not yes and not typer.confirm("Proceed?"):
734
+ print("Pruning cancelled.")
735
+ return
736
+
737
+ strategy.execute()
738
+ print(f"Deleted {counts.total_revision_count} unreferenced revision(s); freed {strategy.expected_freed_size_str}.")
739
+
740
+
741
+ @cache_cli.command()
742
+ def verify(
743
+ repo_id: RepoIdArg,
744
+ repo_type: RepoTypeOpt = RepoTypeOpt.model,
745
+ revision: RevisionOpt = None,
746
+ cache_dir: Annotated[
747
+ Optional[str],
748
+ typer.Option(
749
+ help="Cache directory to use when verifying files from cache (defaults to Hugging Face cache).",
750
+ ),
751
+ ] = None,
752
+ local_dir: Annotated[
753
+ Optional[str],
754
+ typer.Option(
755
+ help="If set, verify files under this directory instead of the cache.",
756
+ ),
757
+ ] = None,
758
+ fail_on_missing_files: Annotated[
759
+ bool,
760
+ typer.Option(
761
+ "--fail-on-missing-files",
762
+ help="Fail if some files exist on the remote but are missing locally.",
763
+ ),
764
+ ] = False,
765
+ fail_on_extra_files: Annotated[
766
+ bool,
767
+ typer.Option(
768
+ "--fail-on-extra-files",
769
+ help="Fail if some files exist locally but are not present on the remote revision.",
770
+ ),
771
+ ] = False,
772
+ token: TokenOpt = None,
773
+ ) -> None:
774
+ """Verify checksums for a single repo revision from cache or a local directory.
775
+
776
+ Examples:
777
+ - Verify main revision in cache: `hf cache verify gpt2`
778
+ - Verify specific revision: `hf cache verify gpt2 --revision refs/pr/1`
779
+ - Verify dataset: `hf cache verify karpathy/fineweb-edu-100b-shuffle --repo-type dataset`
780
+ - Verify local dir: `hf cache verify deepseek-ai/DeepSeek-OCR --local-dir /path/to/repo`
781
+ """
782
+
783
+ if local_dir is not None and cache_dir is not None:
784
+ print("Cannot pass both --local-dir and --cache-dir. Use one or the other.")
785
+ raise typer.Exit(code=2)
786
+
787
+ api = get_hf_api(token=token)
788
+
789
+ result = api.verify_repo_checksums(
790
+ repo_id=repo_id,
791
+ repo_type=repo_type.value if hasattr(repo_type, "value") else str(repo_type),
792
+ revision=revision,
793
+ local_dir=local_dir,
794
+ cache_dir=cache_dir,
795
+ token=token,
796
+ )
797
+
798
+ exit_code = 0
799
+
800
+ has_mismatches = bool(result.mismatches)
801
+ if has_mismatches:
802
+ print("❌ Checksum verification failed for the following file(s):")
803
+ for m in result.mismatches:
804
+ print(f" - {m['path']}: expected {m['expected']} ({m['algorithm']}), got {m['actual']}")
805
+ exit_code = 1
806
+
807
+ if result.missing_paths:
808
+ if fail_on_missing_files:
809
+ print("Missing files (present remotely, absent locally):")
810
+ for p in result.missing_paths:
811
+ print(f" - {p}")
812
+ exit_code = 1
813
+ else:
814
+ warning = (
815
+ f"{len(result.missing_paths)} remote file(s) are missing locally. "
816
+ "Use --fail-on-missing-files for details."
817
+ )
818
+ print(f"⚠️ {warning}")
819
+
820
+ if result.extra_paths:
821
+ if fail_on_extra_files:
822
+ print("Extra files (present locally, absent remotely):")
823
+ for p in result.extra_paths:
824
+ print(f" - {p}")
825
+ exit_code = 1
826
+ else:
827
+ warning = (
828
+ f"{len(result.extra_paths)} local file(s) do not exist on the remote repo. "
829
+ "Use --fail-on-extra-files for details."
830
+ )
831
+ print(f"⚠️ {warning}")
832
+
833
+ verified_location = result.verified_path
834
+
835
+ if exit_code != 0:
836
+ print(f"❌ Verification failed for '{repo_id}' ({repo_type.value}) in {verified_location}.")
837
+ print(f" Revision: {result.revision}")
838
+ raise typer.Exit(code=exit_code)
839
+
840
+ print(f"✅ Verified {result.checked_count} file(s) for '{repo_id}' ({repo_type.value}) in {verified_location}")
841
+ print(" All checksums match.")