huggingface-hub 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +160 -46
- huggingface_hub/_commit_api.py +277 -71
- huggingface_hub/_commit_scheduler.py +15 -15
- huggingface_hub/_inference_endpoints.py +33 -22
- huggingface_hub/_jobs_api.py +301 -0
- huggingface_hub/_local_folder.py +18 -3
- huggingface_hub/_login.py +31 -63
- huggingface_hub/_oauth.py +460 -0
- huggingface_hub/_snapshot_download.py +241 -81
- huggingface_hub/_space_api.py +18 -10
- huggingface_hub/_tensorboard_logger.py +15 -19
- huggingface_hub/_upload_large_folder.py +196 -76
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +15 -25
- huggingface_hub/{commands → cli}/__init__.py +1 -15
- huggingface_hub/cli/_cli_utils.py +173 -0
- huggingface_hub/cli/auth.py +147 -0
- huggingface_hub/cli/cache.py +841 -0
- huggingface_hub/cli/download.py +189 -0
- huggingface_hub/cli/hf.py +60 -0
- huggingface_hub/cli/inference_endpoints.py +377 -0
- huggingface_hub/cli/jobs.py +772 -0
- huggingface_hub/cli/lfs.py +175 -0
- huggingface_hub/cli/repo.py +315 -0
- huggingface_hub/cli/repo_files.py +94 -0
- huggingface_hub/{commands/env.py → cli/system.py} +10 -13
- huggingface_hub/cli/upload.py +294 -0
- huggingface_hub/cli/upload_large_folder.py +117 -0
- huggingface_hub/community.py +20 -12
- huggingface_hub/constants.py +83 -59
- huggingface_hub/dataclasses.py +609 -0
- huggingface_hub/errors.py +99 -30
- huggingface_hub/fastai_utils.py +30 -41
- huggingface_hub/file_download.py +606 -346
- huggingface_hub/hf_api.py +2445 -1132
- huggingface_hub/hf_file_system.py +269 -152
- huggingface_hub/hub_mixin.py +61 -66
- huggingface_hub/inference/_client.py +501 -630
- huggingface_hub/inference/_common.py +133 -121
- huggingface_hub/inference/_generated/_async_client.py +536 -722
- huggingface_hub/inference/_generated/types/__init__.py +6 -1
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +5 -6
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +77 -31
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/image_to_image.py +8 -2
- huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
- huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +11 -11
- huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
- huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/__init__.py +0 -0
- huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
- huggingface_hub/inference/_mcp/agent.py +100 -0
- huggingface_hub/inference/_mcp/cli.py +247 -0
- huggingface_hub/inference/_mcp/constants.py +81 -0
- huggingface_hub/inference/_mcp/mcp_client.py +395 -0
- huggingface_hub/inference/_mcp/types.py +45 -0
- huggingface_hub/inference/_mcp/utils.py +128 -0
- huggingface_hub/inference/_providers/__init__.py +149 -20
- huggingface_hub/inference/_providers/_common.py +160 -37
- huggingface_hub/inference/_providers/black_forest_labs.py +12 -9
- huggingface_hub/inference/_providers/cerebras.py +6 -0
- huggingface_hub/inference/_providers/clarifai.py +13 -0
- huggingface_hub/inference/_providers/cohere.py +32 -0
- huggingface_hub/inference/_providers/fal_ai.py +231 -22
- huggingface_hub/inference/_providers/featherless_ai.py +38 -0
- huggingface_hub/inference/_providers/fireworks_ai.py +22 -1
- huggingface_hub/inference/_providers/groq.py +9 -0
- huggingface_hub/inference/_providers/hf_inference.py +143 -33
- huggingface_hub/inference/_providers/hyperbolic.py +9 -5
- huggingface_hub/inference/_providers/nebius.py +47 -5
- huggingface_hub/inference/_providers/novita.py +48 -5
- huggingface_hub/inference/_providers/nscale.py +44 -0
- huggingface_hub/inference/_providers/openai.py +25 -0
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/replicate.py +46 -9
- huggingface_hub/inference/_providers/sambanova.py +37 -1
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/together.py +34 -5
- huggingface_hub/inference/_providers/wavespeed.py +138 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +33 -100
- huggingface_hub/repocard.py +34 -38
- huggingface_hub/repocard_data.py +79 -59
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +12 -15
- huggingface_hub/serialization/_dduf.py +8 -8
- huggingface_hub/serialization/_torch.py +69 -69
- huggingface_hub/utils/__init__.py +27 -8
- huggingface_hub/utils/_auth.py +7 -7
- huggingface_hub/utils/_cache_manager.py +92 -147
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +55 -0
- huggingface_hub/utils/_experimental.py +7 -5
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +5 -5
- huggingface_hub/utils/_headers.py +8 -30
- huggingface_hub/utils/_http.py +399 -237
- huggingface_hub/utils/_pagination.py +6 -6
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +74 -22
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +13 -11
- huggingface_hub/utils/_telemetry.py +4 -4
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +55 -74
- huggingface_hub/utils/_verification.py +167 -0
- huggingface_hub/utils/_xet.py +235 -0
- huggingface_hub/utils/_xet_progress_reporting.py +162 -0
- huggingface_hub/utils/insecure_hashlib.py +3 -5
- huggingface_hub/utils/logging.py +8 -11
- huggingface_hub/utils/tqdm.py +33 -4
- {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -82
- huggingface_hub-1.1.3.dist-info/RECORD +155 -0
- {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
- huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
- huggingface_hub/commands/delete_cache.py +0 -428
- huggingface_hub/commands/download.py +0 -200
- huggingface_hub/commands/huggingface_cli.py +0 -61
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo_files.py +0 -128
- huggingface_hub/commands/scan_cache.py +0 -181
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -299
- huggingface_hub/commands/upload_large_folder.py +0 -129
- huggingface_hub/commands/user.py +0 -304
- huggingface_hub/commands/version.py +0 -37
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.29.0rc2.dist-info/RECORD +0 -131
- huggingface_hub-0.29.0rc2.dist-info/entry_points.txt +0 -6
- {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
- {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
|
@@ -16,17 +16,17 @@
|
|
|
16
16
|
|
|
17
17
|
import os
|
|
18
18
|
import shutil
|
|
19
|
-
import time
|
|
20
19
|
from collections import defaultdict
|
|
21
20
|
from dataclasses import dataclass
|
|
22
21
|
from pathlib import Path
|
|
23
|
-
from typing import
|
|
22
|
+
from typing import Literal, Optional, Union
|
|
24
23
|
|
|
25
24
|
from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
|
|
26
25
|
|
|
27
|
-
from ..commands._cli_utils import tabulate
|
|
28
26
|
from ..constants import HF_HUB_CACHE
|
|
29
27
|
from . import logging
|
|
28
|
+
from ._parsing import format_timesince
|
|
29
|
+
from ._terminal import tabulate
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
logger = logging.get_logger(__name__)
|
|
@@ -57,13 +57,10 @@ class CachedFileInfo:
|
|
|
57
57
|
blob_last_modified (`float`):
|
|
58
58
|
Timestamp of the last time the blob file has been modified/created.
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
for more details.
|
|
65
|
-
|
|
66
|
-
</Tip>
|
|
60
|
+
> [!WARNING]
|
|
61
|
+
> `blob_last_accessed` and `blob_last_modified` reliability can depend on the OS you
|
|
62
|
+
> are using. See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
|
63
|
+
> for more details.
|
|
67
64
|
"""
|
|
68
65
|
|
|
69
66
|
file_name: str
|
|
@@ -82,7 +79,7 @@ class CachedFileInfo:
|
|
|
82
79
|
|
|
83
80
|
Example: "2 weeks ago".
|
|
84
81
|
"""
|
|
85
|
-
return
|
|
82
|
+
return format_timesince(self.blob_last_accessed)
|
|
86
83
|
|
|
87
84
|
@property
|
|
88
85
|
def blob_last_modified_str(self) -> str:
|
|
@@ -92,7 +89,7 @@ class CachedFileInfo:
|
|
|
92
89
|
|
|
93
90
|
Example: "2 weeks ago".
|
|
94
91
|
"""
|
|
95
|
-
return
|
|
92
|
+
return format_timesince(self.blob_last_modified)
|
|
96
93
|
|
|
97
94
|
@property
|
|
98
95
|
def size_on_disk_str(self) -> str:
|
|
@@ -119,9 +116,9 @@ class CachedRevisionInfo:
|
|
|
119
116
|
snapshot_path (`Path`):
|
|
120
117
|
Path to the revision directory in the `snapshots` folder. It contains the
|
|
121
118
|
exact tree structure as the repo on the Hub.
|
|
122
|
-
files: (`
|
|
119
|
+
files: (`frozenset[CachedFileInfo]`):
|
|
123
120
|
Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
|
|
124
|
-
refs (`
|
|
121
|
+
refs (`frozenset[str]`):
|
|
125
122
|
Set of `refs` pointing to this revision. If the revision has no `refs`, it
|
|
126
123
|
is considered detached.
|
|
127
124
|
Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
|
|
@@ -130,27 +127,21 @@ class CachedRevisionInfo:
|
|
|
130
127
|
last_modified (`float`):
|
|
131
128
|
Timestamp of the last time the revision has been created/modified.
|
|
132
129
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
are shared across revisions.
|
|
137
|
-
|
|
138
|
-
</Tip>
|
|
130
|
+
> [!WARNING]
|
|
131
|
+
> `last_accessed` cannot be determined correctly on a single revision as blob files
|
|
132
|
+
> are shared across revisions.
|
|
139
133
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
size of folders and symlinks.
|
|
145
|
-
|
|
146
|
-
</Tip>
|
|
134
|
+
> [!WARNING]
|
|
135
|
+
> `size_on_disk` is not necessarily the sum of all file sizes because of possible
|
|
136
|
+
> duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
|
137
|
+
> size of folders and symlinks.
|
|
147
138
|
"""
|
|
148
139
|
|
|
149
140
|
commit_hash: str
|
|
150
141
|
snapshot_path: Path
|
|
151
142
|
size_on_disk: int
|
|
152
|
-
files:
|
|
153
|
-
refs:
|
|
143
|
+
files: frozenset[CachedFileInfo]
|
|
144
|
+
refs: frozenset[str]
|
|
154
145
|
|
|
155
146
|
last_modified: float
|
|
156
147
|
|
|
@@ -162,7 +153,7 @@ class CachedRevisionInfo:
|
|
|
162
153
|
|
|
163
154
|
Example: "2 weeks ago".
|
|
164
155
|
"""
|
|
165
|
-
return
|
|
156
|
+
return format_timesince(self.last_modified)
|
|
166
157
|
|
|
167
158
|
@property
|
|
168
159
|
def size_on_disk_str(self) -> str:
|
|
@@ -196,28 +187,22 @@ class CachedRepoInfo:
|
|
|
196
187
|
Sum of the blob file sizes in the cached repo.
|
|
197
188
|
nb_files (`int`):
|
|
198
189
|
Total number of blob files in the cached repo.
|
|
199
|
-
revisions (`
|
|
190
|
+
revisions (`frozenset[CachedRevisionInfo]`):
|
|
200
191
|
Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
|
|
201
192
|
last_accessed (`float`):
|
|
202
193
|
Timestamp of the last time a blob file of the repo has been accessed.
|
|
203
194
|
last_modified (`float`):
|
|
204
195
|
Timestamp of the last time a blob file of the repo has been modified/created.
|
|
205
196
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
size of folders and symlinks.
|
|
211
|
-
|
|
212
|
-
</Tip>
|
|
197
|
+
> [!WARNING]
|
|
198
|
+
> `size_on_disk` is not necessarily the sum of all revisions sizes because of
|
|
199
|
+
> duplicated files. Besides, only blobs are taken into account, not the (negligible)
|
|
200
|
+
> size of folders and symlinks.
|
|
213
201
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
for more details.
|
|
219
|
-
|
|
220
|
-
</Tip>
|
|
202
|
+
> [!WARNING]
|
|
203
|
+
> `last_accessed` and `last_modified` reliability can depend on the OS you are using.
|
|
204
|
+
> See [python documentation](https://docs.python.org/3/library/os.html#os.stat_result)
|
|
205
|
+
> for more details.
|
|
221
206
|
"""
|
|
222
207
|
|
|
223
208
|
repo_id: str
|
|
@@ -225,7 +210,7 @@ class CachedRepoInfo:
|
|
|
225
210
|
repo_path: Path
|
|
226
211
|
size_on_disk: int
|
|
227
212
|
nb_files: int
|
|
228
|
-
revisions:
|
|
213
|
+
revisions: frozenset[CachedRevisionInfo]
|
|
229
214
|
|
|
230
215
|
last_accessed: float
|
|
231
216
|
last_modified: float
|
|
@@ -238,7 +223,7 @@ class CachedRepoInfo:
|
|
|
238
223
|
|
|
239
224
|
Example: "2 weeks ago".
|
|
240
225
|
"""
|
|
241
|
-
return
|
|
226
|
+
return format_timesince(self.last_accessed)
|
|
242
227
|
|
|
243
228
|
@property
|
|
244
229
|
def last_modified_str(self) -> str:
|
|
@@ -248,7 +233,7 @@ class CachedRepoInfo:
|
|
|
248
233
|
|
|
249
234
|
Example: "2 weeks ago".
|
|
250
235
|
"""
|
|
251
|
-
return
|
|
236
|
+
return format_timesince(self.last_modified)
|
|
252
237
|
|
|
253
238
|
@property
|
|
254
239
|
def size_on_disk_str(self) -> str:
|
|
@@ -260,7 +245,12 @@ class CachedRepoInfo:
|
|
|
260
245
|
return _format_size(self.size_on_disk)
|
|
261
246
|
|
|
262
247
|
@property
|
|
263
|
-
def
|
|
248
|
+
def cache_id(self) -> str:
|
|
249
|
+
"""Canonical `type/id` identifier used across cache tooling."""
|
|
250
|
+
return f"{self.repo_type}/{self.repo_id}"
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def refs(self) -> dict[str, CachedRevisionInfo]:
|
|
264
254
|
"""
|
|
265
255
|
(property) Mapping between `refs` and revision data structures.
|
|
266
256
|
"""
|
|
@@ -277,21 +267,21 @@ class DeleteCacheStrategy:
|
|
|
277
267
|
Args:
|
|
278
268
|
expected_freed_size (`float`):
|
|
279
269
|
Expected freed size once strategy is executed.
|
|
280
|
-
blobs (`
|
|
270
|
+
blobs (`frozenset[Path]`):
|
|
281
271
|
Set of blob file paths to be deleted.
|
|
282
|
-
refs (`
|
|
272
|
+
refs (`frozenset[Path]`):
|
|
283
273
|
Set of reference file paths to be deleted.
|
|
284
|
-
repos (`
|
|
274
|
+
repos (`frozenset[Path]`):
|
|
285
275
|
Set of entire repo paths to be deleted.
|
|
286
|
-
snapshots (`
|
|
276
|
+
snapshots (`frozenset[Path]`):
|
|
287
277
|
Set of snapshots to be deleted (directory of symlinks).
|
|
288
278
|
"""
|
|
289
279
|
|
|
290
280
|
expected_freed_size: int
|
|
291
|
-
blobs:
|
|
292
|
-
refs:
|
|
293
|
-
repos:
|
|
294
|
-
snapshots:
|
|
281
|
+
blobs: frozenset[Path]
|
|
282
|
+
refs: frozenset[Path]
|
|
283
|
+
repos: frozenset[Path]
|
|
284
|
+
snapshots: frozenset[Path]
|
|
295
285
|
|
|
296
286
|
@property
|
|
297
287
|
def expected_freed_size_str(self) -> str:
|
|
@@ -305,20 +295,14 @@ class DeleteCacheStrategy:
|
|
|
305
295
|
def execute(self) -> None:
|
|
306
296
|
"""Execute the defined strategy.
|
|
307
297
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
files.
|
|
313
|
-
|
|
314
|
-
</Tip>
|
|
315
|
-
|
|
316
|
-
<Tip warning={true}>
|
|
298
|
+
> [!WARNING]
|
|
299
|
+
> If this method is interrupted, the cache might get corrupted. Deletion order is
|
|
300
|
+
> implemented so that references and symlinks are deleted before the actual blob
|
|
301
|
+
> files.
|
|
317
302
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
</Tip>
|
|
303
|
+
> [!WARNING]
|
|
304
|
+
> This method is irreversible. If executed, cached files are erased and must be
|
|
305
|
+
> downloaded again.
|
|
322
306
|
"""
|
|
323
307
|
# Deletion order matters. Blobs are deleted in last so that the user can't end
|
|
324
308
|
# up in a state where a `ref`` refers to a missing snapshot or a snapshot
|
|
@@ -352,25 +336,22 @@ class HFCacheInfo:
|
|
|
352
336
|
Args:
|
|
353
337
|
size_on_disk (`int`):
|
|
354
338
|
Sum of all valid repo sizes in the cache-system.
|
|
355
|
-
repos (`
|
|
339
|
+
repos (`frozenset[CachedRepoInfo]`):
|
|
356
340
|
Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
|
|
357
341
|
cache-system while scanning.
|
|
358
|
-
warnings (`
|
|
342
|
+
warnings (`list[CorruptedCacheException]`):
|
|
359
343
|
List of [`~CorruptedCacheException`] that occurred while scanning the cache.
|
|
360
344
|
Those exceptions are captured so that the scan can continue. Corrupted repos
|
|
361
345
|
are skipped from the scan.
|
|
362
346
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
some cached repos are corrupted, their sizes are not taken into account.
|
|
367
|
-
|
|
368
|
-
</Tip>
|
|
347
|
+
> [!WARNING]
|
|
348
|
+
> Here `size_on_disk` is equal to the sum of all repo sizes (only blobs). However if
|
|
349
|
+
> some cached repos are corrupted, their sizes are not taken into account.
|
|
369
350
|
"""
|
|
370
351
|
|
|
371
352
|
size_on_disk: int
|
|
372
|
-
repos:
|
|
373
|
-
warnings:
|
|
353
|
+
repos: frozenset[CachedRepoInfo]
|
|
354
|
+
warnings: list[CorruptedCacheException]
|
|
374
355
|
|
|
375
356
|
@property
|
|
376
357
|
def size_on_disk_str(self) -> str:
|
|
@@ -412,17 +393,14 @@ class HFCacheInfo:
|
|
|
412
393
|
Cache deletion done. Saved 8.6G.
|
|
413
394
|
```
|
|
414
395
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
allows having a dry run before actually executing the deletion.
|
|
420
|
-
|
|
421
|
-
</Tip>
|
|
396
|
+
> [!WARNING]
|
|
397
|
+
> `delete_revisions` returns a [`~utils.DeleteCacheStrategy`] object that needs to
|
|
398
|
+
> be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
|
|
399
|
+
> allows having a dry run before actually executing the deletion.
|
|
422
400
|
"""
|
|
423
|
-
hashes_to_delete:
|
|
401
|
+
hashes_to_delete: set[str] = set(revisions)
|
|
424
402
|
|
|
425
|
-
repos_with_revisions:
|
|
403
|
+
repos_with_revisions: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
|
|
426
404
|
|
|
427
405
|
for repo in self.repos:
|
|
428
406
|
for revision in repo.revisions:
|
|
@@ -433,10 +411,10 @@ class HFCacheInfo:
|
|
|
433
411
|
if len(hashes_to_delete) > 0:
|
|
434
412
|
logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
|
|
435
413
|
|
|
436
|
-
delete_strategy_blobs:
|
|
437
|
-
delete_strategy_refs:
|
|
438
|
-
delete_strategy_repos:
|
|
439
|
-
delete_strategy_snapshots:
|
|
414
|
+
delete_strategy_blobs: set[Path] = set()
|
|
415
|
+
delete_strategy_refs: set[Path] = set()
|
|
416
|
+
delete_strategy_repos: set[Path] = set()
|
|
417
|
+
delete_strategy_snapshots: set[Path] = set()
|
|
440
418
|
delete_strategy_expected_freed_size = 0
|
|
441
419
|
|
|
442
420
|
for affected_repo, revisions_to_delete in repos_with_revisions.items():
|
|
@@ -632,17 +610,14 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
632
610
|
)
|
|
633
611
|
```
|
|
634
612
|
|
|
635
|
-
You can also print a detailed report directly from the `
|
|
613
|
+
You can also print a detailed report directly from the `hf` command line using:
|
|
636
614
|
```text
|
|
637
|
-
>
|
|
638
|
-
|
|
639
|
-
---------------------------
|
|
640
|
-
glue
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
bert-base-cased model 1.9G 13 main /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
|
|
644
|
-
t5-base model 10.1K 3 main /Users/lucain/.cache/huggingface/hub/models--t5-base
|
|
645
|
-
t5-small model 970.7M 11 refs/pr/1, main /Users/lucain/.cache/huggingface/hub/models--t5-small
|
|
615
|
+
> hf cache ls
|
|
616
|
+
ID SIZE LAST_ACCESSED LAST_MODIFIED REFS
|
|
617
|
+
--------------------------- -------- ------------- ------------- -----------
|
|
618
|
+
dataset/nyu-mll/glue 157.4M 2 days ago 2 days ago main script
|
|
619
|
+
model/LiquidAI/LFM2-VL-1.6B 3.2G 4 days ago 4 days ago main
|
|
620
|
+
model/microsoft/UserLM-8b 32.1G 4 days ago 4 days ago main
|
|
646
621
|
|
|
647
622
|
Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
|
|
648
623
|
Got 1 warning(s) while scanning. Use -vvv to print details.
|
|
@@ -652,17 +627,14 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
652
627
|
cache_dir (`str` or `Path`, `optional`):
|
|
653
628
|
Cache directory to cache. Defaults to the default HF cache directory.
|
|
654
629
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
If the cache directory is a file, instead of a directory.
|
|
664
|
-
|
|
665
|
-
</Tip>
|
|
630
|
+
> [!WARNING]
|
|
631
|
+
> Raises:
|
|
632
|
+
>
|
|
633
|
+
> `CacheNotFound`
|
|
634
|
+
> If the cache directory does not exist.
|
|
635
|
+
>
|
|
636
|
+
> [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
637
|
+
> If the cache directory is a file, instead of a directory.
|
|
666
638
|
|
|
667
639
|
Returns: a [`~HFCacheInfo`] object.
|
|
668
640
|
"""
|
|
@@ -681,8 +653,8 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
681
653
|
f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
|
|
682
654
|
)
|
|
683
655
|
|
|
684
|
-
repos:
|
|
685
|
-
warnings:
|
|
656
|
+
repos: set[CachedRepoInfo] = set()
|
|
657
|
+
warnings: list[CorruptedCacheException] = []
|
|
686
658
|
for repo_path in cache_dir.iterdir():
|
|
687
659
|
if repo_path.name == ".locks": # skip './.locks/' folder
|
|
688
660
|
continue
|
|
@@ -718,7 +690,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
718
690
|
f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
|
|
719
691
|
)
|
|
720
692
|
|
|
721
|
-
blob_stats:
|
|
693
|
+
blob_stats: dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
|
|
722
694
|
|
|
723
695
|
snapshots_path = repo_path / "snapshots"
|
|
724
696
|
refs_path = repo_path / "refs"
|
|
@@ -729,7 +701,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
729
701
|
# Scan over `refs` directory
|
|
730
702
|
|
|
731
703
|
# key is revision hash, value is set of refs
|
|
732
|
-
refs_by_hash:
|
|
704
|
+
refs_by_hash: dict[str, set[str]] = defaultdict(set)
|
|
733
705
|
if refs_path.exists():
|
|
734
706
|
# Example of `refs` directory
|
|
735
707
|
# ── refs
|
|
@@ -752,7 +724,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
752
724
|
refs_by_hash[commit_hash].add(ref_name)
|
|
753
725
|
|
|
754
726
|
# Scan snapshots directory
|
|
755
|
-
cached_revisions:
|
|
727
|
+
cached_revisions: set[CachedRevisionInfo] = set()
|
|
756
728
|
for revision_path in snapshots_path.iterdir():
|
|
757
729
|
# Ignore OS-created helper files
|
|
758
730
|
if revision_path.name in FILES_TO_IGNORE:
|
|
@@ -846,37 +818,10 @@ def _format_size(num: int) -> str:
|
|
|
846
818
|
return f"{num_f:.1f}Y"
|
|
847
819
|
|
|
848
820
|
|
|
849
|
-
_TIMESINCE_CHUNKS = (
|
|
850
|
-
# Label, divider, max value
|
|
851
|
-
("second", 1, 60),
|
|
852
|
-
("minute", 60, 60),
|
|
853
|
-
("hour", 60 * 60, 24),
|
|
854
|
-
("day", 60 * 60 * 24, 6),
|
|
855
|
-
("week", 60 * 60 * 24 * 7, 6),
|
|
856
|
-
("month", 60 * 60 * 24 * 30, 11),
|
|
857
|
-
("year", 60 * 60 * 24 * 365, None),
|
|
858
|
-
)
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
def _format_timesince(ts: float) -> str:
|
|
862
|
-
"""Format timestamp in seconds into a human-readable string, relative to now.
|
|
863
|
-
|
|
864
|
-
Vaguely inspired by Django's `timesince` formatter.
|
|
865
|
-
"""
|
|
866
|
-
delta = time.time() - ts
|
|
867
|
-
if delta < 20:
|
|
868
|
-
return "a few seconds ago"
|
|
869
|
-
for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
|
|
870
|
-
value = round(delta / divider)
|
|
871
|
-
if max_value is not None and value <= max_value:
|
|
872
|
-
break
|
|
873
|
-
return f"{value} {label}{'s' if value > 1 else ''} ago"
|
|
874
|
-
|
|
875
|
-
|
|
876
821
|
def _try_delete_path(path: Path, path_type: str) -> None:
|
|
877
822
|
"""Try to delete a local file or folder.
|
|
878
823
|
|
|
879
|
-
If the path does not
|
|
824
|
+
If the path does not exist, error is logged as a warning and then ignored.
|
|
880
825
|
|
|
881
826
|
Args:
|
|
882
827
|
path (`Path`)
|
|
@@ -49,9 +49,8 @@ def chunk_iterable(iterable: Iterable[T], chunk_size: int) -> Iterable[Iterable[
|
|
|
49
49
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
50
50
|
If `chunk_size` <= 0.
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
</Tip>
|
|
52
|
+
> [!WARNING]
|
|
53
|
+
> The last chunk can be smaller than `chunk_size`.
|
|
55
54
|
"""
|
|
56
55
|
if not isinstance(chunk_size, int) or chunk_size <= 0:
|
|
57
56
|
raise ValueError("`chunk_size` must be a strictly positive integer (>0).")
|
|
@@ -62,7 +62,7 @@ def _deprecate_arguments(
|
|
|
62
62
|
Args:
|
|
63
63
|
version (`str`):
|
|
64
64
|
The version when deprecated arguments will result in error.
|
|
65
|
-
deprecated_args (`
|
|
65
|
+
deprecated_args (`list[str]`):
|
|
66
66
|
List of the arguments to be deprecated.
|
|
67
67
|
custom_message (`str`, *optional*):
|
|
68
68
|
Warning message that is raised. If not passed, a default warning message
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# AI-generated module (ChatGPT)
|
|
2
|
+
import re
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load_dotenv(dotenv_str: str, environ: Optional[dict[str, str]] = None) -> dict[str, str]:
|
|
7
|
+
"""
|
|
8
|
+
Parse a DOTENV-format string and return a dictionary of key-value pairs.
|
|
9
|
+
Handles quoted values, comments, export keyword, and blank lines.
|
|
10
|
+
"""
|
|
11
|
+
env: dict[str, str] = {}
|
|
12
|
+
line_pattern = re.compile(
|
|
13
|
+
r"""
|
|
14
|
+
^\s*
|
|
15
|
+
(?:export[^\S\n]+)? # optional export
|
|
16
|
+
([A-Za-z_][A-Za-z0-9_]*) # key
|
|
17
|
+
[^\S\n]*(=)?[^\S\n]*
|
|
18
|
+
( # value group
|
|
19
|
+
(?:
|
|
20
|
+
'(?:\\'|[^'])*' # single-quoted value
|
|
21
|
+
| \"(?:\\\"|[^\"])*\" # double-quoted value
|
|
22
|
+
| [^#\n\r]+? # unquoted value
|
|
23
|
+
)
|
|
24
|
+
)?
|
|
25
|
+
[^\S\n]*(?:\#.*)?$ # optional inline comment
|
|
26
|
+
""",
|
|
27
|
+
re.VERBOSE,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
for line in dotenv_str.splitlines():
|
|
31
|
+
line = line.strip()
|
|
32
|
+
if not line or line.startswith("#"):
|
|
33
|
+
continue # Skip comments and empty lines
|
|
34
|
+
|
|
35
|
+
match = line_pattern.match(line)
|
|
36
|
+
if match:
|
|
37
|
+
key = match.group(1)
|
|
38
|
+
val = None
|
|
39
|
+
if match.group(2): # if there is '='
|
|
40
|
+
raw_val = match.group(3) or ""
|
|
41
|
+
val = raw_val.strip()
|
|
42
|
+
# Remove surrounding quotes if quoted
|
|
43
|
+
if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")):
|
|
44
|
+
val = val[1:-1]
|
|
45
|
+
val = val.replace(r"\n", "\n").replace(r"\t", "\t").replace(r"\"", '"').replace(r"\\", "\\")
|
|
46
|
+
if raw_val.startswith('"'):
|
|
47
|
+
val = val.replace(r"\$", "$") # only in double quotes
|
|
48
|
+
elif environ is not None:
|
|
49
|
+
# Get it from the current environment
|
|
50
|
+
val = environ.get(key)
|
|
51
|
+
|
|
52
|
+
if val is not None:
|
|
53
|
+
env[key] = val
|
|
54
|
+
|
|
55
|
+
return env
|
|
@@ -24,8 +24,10 @@ from .. import constants
|
|
|
24
24
|
def experimental(fn: Callable) -> Callable:
|
|
25
25
|
"""Decorator to flag a feature as experimental.
|
|
26
26
|
|
|
27
|
-
An experimental feature
|
|
28
|
-
|
|
27
|
+
An experimental feature triggers a warning when used as it might be subject to breaking changes without prior notice
|
|
28
|
+
in the future.
|
|
29
|
+
|
|
30
|
+
Warnings can be disabled by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
|
|
29
31
|
|
|
30
32
|
Args:
|
|
31
33
|
fn (`Callable`):
|
|
@@ -44,8 +46,8 @@ def experimental(fn: Callable) -> Callable:
|
|
|
44
46
|
... print("Hello world!")
|
|
45
47
|
|
|
46
48
|
>>> my_function()
|
|
47
|
-
UserWarning: 'my_function' is experimental and might be subject to breaking changes in the future
|
|
48
|
-
this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
|
|
49
|
+
UserWarning: 'my_function' is experimental and might be subject to breaking changes in the future without prior
|
|
50
|
+
notice. You can disable this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment variable.
|
|
49
51
|
Hello world!
|
|
50
52
|
```
|
|
51
53
|
"""
|
|
@@ -56,7 +58,7 @@ def experimental(fn: Callable) -> Callable:
|
|
|
56
58
|
def _inner_fn(*args, **kwargs):
|
|
57
59
|
if not constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING:
|
|
58
60
|
warnings.warn(
|
|
59
|
-
f"'{name}' is experimental and might be subject to breaking changes in the future."
|
|
61
|
+
f"'{name}' is experimental and might be subject to breaking changes in the future without prior notice."
|
|
60
62
|
" You can disable this warning by setting `HF_HUB_DISABLE_EXPERIMENTAL_WARNING=1` as environment"
|
|
61
63
|
" variable.",
|
|
62
64
|
UserWarning,
|
huggingface_hub/utils/_fixes.py
CHANGED
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
# JSONDecodeError was introduced in requests=2.27 released in 2022.
|
|
2
|
-
# This allows us to support older requests for users
|
|
3
|
-
# More information: https://github.com/psf/requests/pull/5856
|
|
4
|
-
try:
|
|
5
|
-
from requests import JSONDecodeError # type: ignore # noqa: F401
|
|
6
|
-
except ImportError:
|
|
7
|
-
try:
|
|
8
|
-
from simplejson import JSONDecodeError # type: ignore # noqa: F401
|
|
9
|
-
except ImportError:
|
|
10
|
-
from json import JSONDecodeError # type: ignore # noqa: F401
|
|
11
1
|
import contextlib
|
|
12
2
|
import os
|
|
13
3
|
import shutil
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
import re
|
|
18
18
|
import subprocess
|
|
19
|
-
from typing import
|
|
19
|
+
from typing import Optional
|
|
20
20
|
|
|
21
21
|
from ..constants import ENDPOINT
|
|
22
22
|
from ._subprocess import run_interactive_subprocess, run_subprocess
|
|
@@ -27,14 +27,14 @@ GIT_CREDENTIAL_REGEX = re.compile(
|
|
|
27
27
|
^\s* # start of line
|
|
28
28
|
credential\.helper # credential.helper value
|
|
29
29
|
\s*=\s* # separator
|
|
30
|
-
(\w+) # the helper name (group 1)
|
|
30
|
+
([\w\-\/]+) # the helper name or absolute path (group 1)
|
|
31
31
|
(\s|$) # whitespace or end of line
|
|
32
32
|
""",
|
|
33
33
|
flags=re.MULTILINE | re.IGNORECASE | re.VERBOSE,
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def list_credential_helpers(folder: Optional[str] = None) ->
|
|
37
|
+
def list_credential_helpers(folder: Optional[str] = None) -> list[str]:
|
|
38
38
|
"""Return the list of git credential helpers configured.
|
|
39
39
|
|
|
40
40
|
See https://git-scm.com/docs/gitcredentials.
|
|
@@ -104,14 +104,14 @@ def unset_git_credential(username: str = "hf_user", folder: Optional[str] = None
|
|
|
104
104
|
stdin.flush()
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def _parse_credential_output(output: str) ->
|
|
107
|
+
def _parse_credential_output(output: str) -> list[str]:
|
|
108
108
|
"""Parse the output of `git credential fill` to extract the password.
|
|
109
109
|
|
|
110
110
|
Args:
|
|
111
111
|
output (`str`):
|
|
112
112
|
The output of `git credential fill`.
|
|
113
113
|
"""
|
|
114
|
-
# NOTE: If user has set
|
|
114
|
+
# NOTE: If user has set a helper for a custom URL, it will not be caught here.
|
|
115
115
|
# Example: `credential.https://huggingface.co.helper=store`
|
|
116
116
|
# See: https://github.com/huggingface/huggingface_hub/pull/1138#discussion_r1013324508
|
|
117
117
|
return sorted( # Sort for nice printing
|