huggingface-hub 0.24.7__py3-none-any.whl → 0.25.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +21 -1
- huggingface_hub/_commit_api.py +4 -4
- huggingface_hub/_inference_endpoints.py +13 -1
- huggingface_hub/_local_folder.py +191 -4
- huggingface_hub/_login.py +6 -6
- huggingface_hub/_snapshot_download.py +8 -17
- huggingface_hub/_space_api.py +5 -0
- huggingface_hub/_tensorboard_logger.py +29 -13
- huggingface_hub/_upload_large_folder.py +573 -0
- huggingface_hub/_webhooks_server.py +1 -1
- huggingface_hub/commands/_cli_utils.py +5 -0
- huggingface_hub/commands/download.py +8 -0
- huggingface_hub/commands/huggingface_cli.py +6 -1
- huggingface_hub/commands/lfs.py +2 -1
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +99 -57
- huggingface_hub/commands/tag.py +1 -1
- huggingface_hub/commands/upload.py +2 -1
- huggingface_hub/commands/upload_large_folder.py +129 -0
- huggingface_hub/commands/version.py +37 -0
- huggingface_hub/community.py +2 -2
- huggingface_hub/errors.py +218 -1
- huggingface_hub/fastai_utils.py +2 -3
- huggingface_hub/file_download.py +61 -62
- huggingface_hub/hf_api.py +758 -314
- huggingface_hub/hf_file_system.py +15 -23
- huggingface_hub/hub_mixin.py +27 -25
- huggingface_hub/inference/_client.py +78 -127
- huggingface_hub/inference/_generated/_async_client.py +169 -144
- huggingface_hub/inference/_generated/types/base.py +0 -9
- huggingface_hub/inference/_templating.py +2 -3
- huggingface_hub/inference_api.py +2 -2
- huggingface_hub/keras_mixin.py +2 -2
- huggingface_hub/lfs.py +7 -98
- huggingface_hub/repocard.py +6 -5
- huggingface_hub/repository.py +5 -5
- huggingface_hub/serialization/_torch.py +64 -11
- huggingface_hub/utils/__init__.py +13 -14
- huggingface_hub/utils/_cache_manager.py +97 -14
- huggingface_hub/utils/_fixes.py +18 -2
- huggingface_hub/utils/_http.py +228 -2
- huggingface_hub/utils/_lfs.py +110 -0
- huggingface_hub/utils/_runtime.py +7 -1
- huggingface_hub/utils/_token.py +3 -2
- {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/METADATA +2 -2
- {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/RECORD +50 -48
- huggingface_hub/inference/_types.py +0 -52
- huggingface_hub/utils/_errors.py +0 -397
- {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/lfs.py
CHANGED
|
@@ -16,10 +16,8 @@
|
|
|
16
16
|
|
|
17
17
|
import inspect
|
|
18
18
|
import io
|
|
19
|
-
import os
|
|
20
19
|
import re
|
|
21
20
|
import warnings
|
|
22
|
-
from contextlib import AbstractContextManager
|
|
23
21
|
from dataclasses import dataclass
|
|
24
22
|
from math import ceil
|
|
25
23
|
from os.path import getsize
|
|
@@ -27,7 +25,7 @@ from pathlib import Path
|
|
|
27
25
|
from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Optional, Tuple, TypedDict
|
|
28
26
|
from urllib.parse import unquote
|
|
29
27
|
|
|
30
|
-
from huggingface_hub
|
|
28
|
+
from huggingface_hub import constants
|
|
31
29
|
|
|
32
30
|
from .utils import (
|
|
33
31
|
build_hf_headers,
|
|
@@ -39,6 +37,7 @@ from .utils import (
|
|
|
39
37
|
tqdm,
|
|
40
38
|
validate_hf_hub_args,
|
|
41
39
|
)
|
|
40
|
+
from .utils._lfs import SliceFileObj
|
|
42
41
|
from .utils.sha import sha256, sha_fileobj
|
|
43
42
|
|
|
44
43
|
|
|
@@ -139,10 +138,10 @@ def post_lfs_batch_info(
|
|
|
139
138
|
[`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
|
|
140
139
|
If the server returned an error.
|
|
141
140
|
"""
|
|
142
|
-
endpoint = endpoint if endpoint is not None else ENDPOINT
|
|
141
|
+
endpoint = endpoint if endpoint is not None else constants.ENDPOINT
|
|
143
142
|
url_prefix = ""
|
|
144
|
-
if repo_type in REPO_TYPES_URL_PREFIXES:
|
|
145
|
-
url_prefix = REPO_TYPES_URL_PREFIXES[repo_type]
|
|
143
|
+
if repo_type in constants.REPO_TYPES_URL_PREFIXES:
|
|
144
|
+
url_prefix = constants.REPO_TYPES_URL_PREFIXES[repo_type]
|
|
146
145
|
batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch"
|
|
147
146
|
payload: Dict = {
|
|
148
147
|
"operation": "upload",
|
|
@@ -328,9 +327,9 @@ def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size
|
|
|
328
327
|
sorted_parts_urls = _get_sorted_parts_urls(header=header, upload_info=operation.upload_info, chunk_size=chunk_size)
|
|
329
328
|
|
|
330
329
|
# 2. Upload parts (either with hf_transfer or in pure Python)
|
|
331
|
-
use_hf_transfer = HF_HUB_ENABLE_HF_TRANSFER
|
|
330
|
+
use_hf_transfer = constants.HF_HUB_ENABLE_HF_TRANSFER
|
|
332
331
|
if (
|
|
333
|
-
HF_HUB_ENABLE_HF_TRANSFER
|
|
332
|
+
constants.HF_HUB_ENABLE_HF_TRANSFER
|
|
334
333
|
and not isinstance(operation.path_or_fileobj, str)
|
|
335
334
|
and not isinstance(operation.path_or_fileobj, Path)
|
|
336
335
|
):
|
|
@@ -462,93 +461,3 @@ def _upload_parts_hf_transfer(
|
|
|
462
461
|
if not supports_callback:
|
|
463
462
|
progress.update(total)
|
|
464
463
|
return output
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
class SliceFileObj(AbstractContextManager):
|
|
468
|
-
"""
|
|
469
|
-
Utility context manager to read a *slice* of a seekable file-like object as a seekable, file-like object.
|
|
470
|
-
|
|
471
|
-
This is NOT thread safe
|
|
472
|
-
|
|
473
|
-
Inspired by stackoverflow.com/a/29838711/593036
|
|
474
|
-
|
|
475
|
-
Credits to @julien-c
|
|
476
|
-
|
|
477
|
-
Args:
|
|
478
|
-
fileobj (`BinaryIO`):
|
|
479
|
-
A file-like object to slice. MUST implement `tell()` and `seek()` (and `read()` of course).
|
|
480
|
-
`fileobj` will be reset to its original position when exiting the context manager.
|
|
481
|
-
seek_from (`int`):
|
|
482
|
-
The start of the slice (offset from position 0 in bytes).
|
|
483
|
-
read_limit (`int`):
|
|
484
|
-
The maximum number of bytes to read from the slice.
|
|
485
|
-
|
|
486
|
-
Attributes:
|
|
487
|
-
previous_position (`int`):
|
|
488
|
-
The previous position
|
|
489
|
-
|
|
490
|
-
Examples:
|
|
491
|
-
|
|
492
|
-
Reading 200 bytes with an offset of 128 bytes from a file (ie bytes 128 to 327):
|
|
493
|
-
```python
|
|
494
|
-
>>> with open("path/to/file", "rb") as file:
|
|
495
|
-
... with SliceFileObj(file, seek_from=128, read_limit=200) as fslice:
|
|
496
|
-
... fslice.read(...)
|
|
497
|
-
```
|
|
498
|
-
|
|
499
|
-
Reading a file in chunks of 512 bytes
|
|
500
|
-
```python
|
|
501
|
-
>>> import os
|
|
502
|
-
>>> chunk_size = 512
|
|
503
|
-
>>> file_size = os.getsize("path/to/file")
|
|
504
|
-
>>> with open("path/to/file", "rb") as file:
|
|
505
|
-
... for chunk_idx in range(ceil(file_size / chunk_size)):
|
|
506
|
-
... with SliceFileObj(file, seek_from=chunk_idx * chunk_size, read_limit=chunk_size) as fslice:
|
|
507
|
-
... chunk = fslice.read(...)
|
|
508
|
-
|
|
509
|
-
```
|
|
510
|
-
"""
|
|
511
|
-
|
|
512
|
-
def __init__(self, fileobj: BinaryIO, seek_from: int, read_limit: int):
|
|
513
|
-
self.fileobj = fileobj
|
|
514
|
-
self.seek_from = seek_from
|
|
515
|
-
self.read_limit = read_limit
|
|
516
|
-
|
|
517
|
-
def __enter__(self):
|
|
518
|
-
self._previous_position = self.fileobj.tell()
|
|
519
|
-
end_of_stream = self.fileobj.seek(0, os.SEEK_END)
|
|
520
|
-
self._len = min(self.read_limit, end_of_stream - self.seek_from)
|
|
521
|
-
# ^^ The actual number of bytes that can be read from the slice
|
|
522
|
-
self.fileobj.seek(self.seek_from, io.SEEK_SET)
|
|
523
|
-
return self
|
|
524
|
-
|
|
525
|
-
def __exit__(self, exc_type, exc_value, traceback):
|
|
526
|
-
self.fileobj.seek(self._previous_position, io.SEEK_SET)
|
|
527
|
-
|
|
528
|
-
def read(self, n: int = -1):
|
|
529
|
-
pos = self.tell()
|
|
530
|
-
if pos >= self._len:
|
|
531
|
-
return b""
|
|
532
|
-
remaining_amount = self._len - pos
|
|
533
|
-
data = self.fileobj.read(remaining_amount if n < 0 else min(n, remaining_amount))
|
|
534
|
-
return data
|
|
535
|
-
|
|
536
|
-
def tell(self) -> int:
|
|
537
|
-
return self.fileobj.tell() - self.seek_from
|
|
538
|
-
|
|
539
|
-
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
540
|
-
start = self.seek_from
|
|
541
|
-
end = start + self._len
|
|
542
|
-
if whence in (os.SEEK_SET, os.SEEK_END):
|
|
543
|
-
offset = start + offset if whence == os.SEEK_SET else end + offset
|
|
544
|
-
offset = max(start, min(offset, end))
|
|
545
|
-
whence = os.SEEK_SET
|
|
546
|
-
elif whence == os.SEEK_CUR:
|
|
547
|
-
cur_pos = self.fileobj.tell()
|
|
548
|
-
offset = max(start - cur_pos, min(offset, end - cur_pos))
|
|
549
|
-
else:
|
|
550
|
-
raise ValueError(f"whence value {whence} is not supported")
|
|
551
|
-
return self.fileobj.seek(offset, whence) - self.seek_from
|
|
552
|
-
|
|
553
|
-
def __iter__(self):
|
|
554
|
-
yield self.read(n=4 * 1024 * 1024)
|
huggingface_hub/repocard.py
CHANGED
|
@@ -19,8 +19,9 @@ from huggingface_hub.repocard_data import (
|
|
|
19
19
|
)
|
|
20
20
|
from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
|
|
21
21
|
|
|
22
|
-
from .
|
|
23
|
-
from .
|
|
22
|
+
from . import constants
|
|
23
|
+
from .errors import EntryNotFoundError
|
|
24
|
+
from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
logger = logging.get_logger(__name__)
|
|
@@ -175,7 +176,7 @@ class RepoCard:
|
|
|
175
176
|
card_path = Path(
|
|
176
177
|
hf_hub_download(
|
|
177
178
|
repo_id_or_path,
|
|
178
|
-
REPOCARD_NAME,
|
|
179
|
+
constants.REPOCARD_NAME,
|
|
179
180
|
repo_type=repo_type or cls.repo_type,
|
|
180
181
|
token=token,
|
|
181
182
|
)
|
|
@@ -273,11 +274,11 @@ class RepoCard:
|
|
|
273
274
|
self.validate(repo_type=repo_type)
|
|
274
275
|
|
|
275
276
|
with SoftTemporaryDirectory() as tmpdir:
|
|
276
|
-
tmp_path = Path(tmpdir) / REPOCARD_NAME
|
|
277
|
+
tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
|
|
277
278
|
tmp_path.write_text(str(self))
|
|
278
279
|
url = upload_file(
|
|
279
280
|
path_or_fileobj=str(tmp_path),
|
|
280
|
-
path_in_repo=REPOCARD_NAME,
|
|
281
|
+
path_in_repo=constants.REPOCARD_NAME,
|
|
281
282
|
repo_id=repo_id,
|
|
282
283
|
token=token,
|
|
283
284
|
repo_type=repo_type,
|
huggingface_hub/repository.py
CHANGED
|
@@ -9,7 +9,7 @@ from pathlib import Path
|
|
|
9
9
|
from typing import Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, Union
|
|
10
10
|
from urllib.parse import urlparse
|
|
11
11
|
|
|
12
|
-
from huggingface_hub
|
|
12
|
+
from huggingface_hub import constants
|
|
13
13
|
from huggingface_hub.repocard import metadata_load, metadata_save
|
|
14
14
|
|
|
15
15
|
from .hf_api import HfApi, repo_type_and_id_from_hf_id
|
|
@@ -659,8 +659,8 @@ class Repository:
|
|
|
659
659
|
|
|
660
660
|
repo_url = hub_url + "/"
|
|
661
661
|
|
|
662
|
-
if self._repo_type in REPO_TYPES_URL_PREFIXES:
|
|
663
|
-
repo_url += REPO_TYPES_URL_PREFIXES[self._repo_type]
|
|
662
|
+
if self._repo_type in constants.REPO_TYPES_URL_PREFIXES:
|
|
663
|
+
repo_url += constants.REPO_TYPES_URL_PREFIXES[self._repo_type]
|
|
664
664
|
|
|
665
665
|
if token is not None:
|
|
666
666
|
# Add token in git url when provided
|
|
@@ -1434,13 +1434,13 @@ class Repository:
|
|
|
1434
1434
|
os.chdir(current_working_directory)
|
|
1435
1435
|
|
|
1436
1436
|
def repocard_metadata_load(self) -> Optional[Dict]:
|
|
1437
|
-
filepath = os.path.join(self.local_dir, REPOCARD_NAME)
|
|
1437
|
+
filepath = os.path.join(self.local_dir, constants.REPOCARD_NAME)
|
|
1438
1438
|
if os.path.isfile(filepath):
|
|
1439
1439
|
return metadata_load(filepath)
|
|
1440
1440
|
return None
|
|
1441
1441
|
|
|
1442
1442
|
def repocard_metadata_save(self, data: Dict) -> None:
|
|
1443
|
-
return metadata_save(os.path.join(self.local_dir, REPOCARD_NAME), data)
|
|
1443
|
+
return metadata_save(os.path.join(self.local_dir, constants.REPOCARD_NAME), data)
|
|
1444
1444
|
|
|
1445
1445
|
@property
|
|
1446
1446
|
def commands_failed(self):
|
|
@@ -20,7 +20,7 @@ import re
|
|
|
20
20
|
from collections import defaultdict
|
|
21
21
|
from functools import lru_cache
|
|
22
22
|
from pathlib import Path
|
|
23
|
-
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
|
|
24
24
|
|
|
25
25
|
from .. import constants, logging
|
|
26
26
|
from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
|
|
@@ -336,17 +336,24 @@ def split_torch_state_dict_into_shards(
|
|
|
336
336
|
)
|
|
337
337
|
|
|
338
338
|
|
|
339
|
-
def
|
|
339
|
+
def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
|
|
340
|
+
"""Returns a unique id for plain tensor
|
|
341
|
+
or a (potentially nested) Tuple of unique id for the flattened Tensor
|
|
342
|
+
if the input is a wrapper tensor subclass Tensor
|
|
340
343
|
"""
|
|
341
|
-
Return unique identifier to a tensor storage.
|
|
342
344
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
345
|
+
try:
|
|
346
|
+
# for torch 2.1 and above we can also handle tensor subclasses
|
|
347
|
+
from torch.utils._python_dispatch import is_traceable_wrapper_subclass
|
|
348
|
+
|
|
349
|
+
if is_traceable_wrapper_subclass(tensor):
|
|
350
|
+
attrs, _ = tensor.__tensor_flatten__() # type: ignore[attr-defined]
|
|
351
|
+
return tuple(_get_unique_id(getattr(tensor, attr)) for attr in attrs)
|
|
352
|
+
|
|
353
|
+
except ImportError:
|
|
354
|
+
# for torch version less than 2.1, we can fallback to original implementation
|
|
355
|
+
pass
|
|
347
356
|
|
|
348
|
-
Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
|
|
349
|
-
"""
|
|
350
357
|
if tensor.device.type == "xla" and is_torch_tpu_available():
|
|
351
358
|
# NOTE: xla tensors dont have storage
|
|
352
359
|
# use some other unique id to distinguish.
|
|
@@ -358,13 +365,38 @@ def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", int, i
|
|
|
358
365
|
else:
|
|
359
366
|
unique_id = storage_ptr(tensor)
|
|
360
367
|
|
|
361
|
-
return
|
|
368
|
+
return unique_id
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", Union[int, Tuple[Any, ...]], int]:
|
|
372
|
+
"""
|
|
373
|
+
Return unique identifier to a tensor storage.
|
|
374
|
+
|
|
375
|
+
Multiple different tensors can share the same underlying storage. For
|
|
376
|
+
example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
|
|
377
|
+
guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
|
|
378
|
+
non-overlapping lifetimes may have the same id.
|
|
379
|
+
|
|
380
|
+
Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
|
|
381
|
+
"""
|
|
382
|
+
return tensor.device, _get_unique_id(tensor), get_torch_storage_size(tensor)
|
|
362
383
|
|
|
363
384
|
|
|
364
385
|
def get_torch_storage_size(tensor: "torch.Tensor") -> int:
|
|
365
386
|
"""
|
|
366
387
|
Taken from https://github.com/huggingface/safetensors/blob/08db34094e9e59e2f9218f2df133b7b4aaff5a99/bindings/python/py_src/safetensors/torch.py#L31C1-L41C59
|
|
367
388
|
"""
|
|
389
|
+
try:
|
|
390
|
+
# for torch 2.1 and above we can also handle tensor subclasses
|
|
391
|
+
from torch.utils._python_dispatch import is_traceable_wrapper_subclass
|
|
392
|
+
|
|
393
|
+
if is_traceable_wrapper_subclass(tensor):
|
|
394
|
+
attrs, _ = tensor.__tensor_flatten__() # type: ignore[attr-defined]
|
|
395
|
+
return sum(get_torch_storage_size(getattr(tensor, attr)) for attr in attrs)
|
|
396
|
+
except ImportError:
|
|
397
|
+
# for torch version less than 2.1, we can fallback to original implementation
|
|
398
|
+
pass
|
|
399
|
+
|
|
368
400
|
try:
|
|
369
401
|
return tensor.untyped_storage().nbytes()
|
|
370
402
|
except AttributeError:
|
|
@@ -398,10 +430,20 @@ def is_torch_tpu_available(check_device=True):
|
|
|
398
430
|
return False
|
|
399
431
|
|
|
400
432
|
|
|
401
|
-
def storage_ptr(tensor: "torch.Tensor") -> int:
|
|
433
|
+
def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
|
|
402
434
|
"""
|
|
403
435
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L11.
|
|
404
436
|
"""
|
|
437
|
+
try:
|
|
438
|
+
# for torch 2.1 and above we can also handle tensor subclasses
|
|
439
|
+
from torch.utils._python_dispatch import is_traceable_wrapper_subclass
|
|
440
|
+
|
|
441
|
+
if is_traceable_wrapper_subclass(tensor):
|
|
442
|
+
return _get_unique_id(tensor)
|
|
443
|
+
except ImportError:
|
|
444
|
+
# for torch version less than 2.1, we can fallback to original implementation
|
|
445
|
+
pass
|
|
446
|
+
|
|
405
447
|
try:
|
|
406
448
|
return tensor.untyped_storage().data_ptr()
|
|
407
449
|
except Exception:
|
|
@@ -496,6 +538,17 @@ def _is_complete(tensor: "torch.Tensor") -> bool:
|
|
|
496
538
|
"""
|
|
497
539
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L80
|
|
498
540
|
"""
|
|
541
|
+
try:
|
|
542
|
+
# for torch 2.1 and above we can also handle tensor subclasses
|
|
543
|
+
from torch.utils._python_dispatch import is_traceable_wrapper_subclass
|
|
544
|
+
|
|
545
|
+
if is_traceable_wrapper_subclass(tensor):
|
|
546
|
+
attrs, _ = tensor.__tensor_flatten__() # type: ignore[attr-defined]
|
|
547
|
+
return all(_is_complete(getattr(tensor, attr)) for attr in attrs)
|
|
548
|
+
except ImportError:
|
|
549
|
+
# for torch version less than 2.1, we can fallback to original implementation
|
|
550
|
+
pass
|
|
551
|
+
|
|
499
552
|
return tensor.data_ptr() == storage_ptr(tensor) and tensor.nelement() * _get_dtype_size(
|
|
500
553
|
tensor.dtype
|
|
501
554
|
) == get_torch_storage_size(tensor)
|
|
@@ -16,10 +16,21 @@
|
|
|
16
16
|
# ruff: noqa: F401
|
|
17
17
|
|
|
18
18
|
from huggingface_hub.errors import (
|
|
19
|
+
BadRequestError,
|
|
20
|
+
CacheNotFound,
|
|
21
|
+
CorruptedCacheException,
|
|
22
|
+
DisabledRepoError,
|
|
23
|
+
EntryNotFoundError,
|
|
24
|
+
FileMetadataError,
|
|
25
|
+
GatedRepoError,
|
|
26
|
+
HfHubHTTPError,
|
|
19
27
|
HFValidationError,
|
|
28
|
+
LocalEntryNotFoundError,
|
|
20
29
|
LocalTokenNotFoundError,
|
|
21
30
|
NotASafetensorsRepoError,
|
|
22
31
|
OfflineModeIsEnabled,
|
|
32
|
+
RepositoryNotFoundError,
|
|
33
|
+
RevisionNotFoundError,
|
|
23
34
|
SafetensorsParsingError,
|
|
24
35
|
)
|
|
25
36
|
|
|
@@ -29,26 +40,12 @@ from ._cache_manager import (
|
|
|
29
40
|
CachedFileInfo,
|
|
30
41
|
CachedRepoInfo,
|
|
31
42
|
CachedRevisionInfo,
|
|
32
|
-
CacheNotFound,
|
|
33
|
-
CorruptedCacheException,
|
|
34
43
|
DeleteCacheStrategy,
|
|
35
44
|
HFCacheInfo,
|
|
36
45
|
scan_cache_dir,
|
|
37
46
|
)
|
|
38
47
|
from ._chunk_utils import chunk_iterable
|
|
39
48
|
from ._datetime import parse_datetime
|
|
40
|
-
from ._errors import (
|
|
41
|
-
BadRequestError,
|
|
42
|
-
DisabledRepoError,
|
|
43
|
-
EntryNotFoundError,
|
|
44
|
-
FileMetadataError,
|
|
45
|
-
GatedRepoError,
|
|
46
|
-
HfHubHTTPError,
|
|
47
|
-
LocalEntryNotFoundError,
|
|
48
|
-
RepositoryNotFoundError,
|
|
49
|
-
RevisionNotFoundError,
|
|
50
|
-
hf_raise_for_status,
|
|
51
|
-
)
|
|
52
49
|
from ._experimental import experimental
|
|
53
50
|
from ._fixes import SoftTemporaryDirectory, WeakFileLock, yaml_dump
|
|
54
51
|
from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
|
|
@@ -58,6 +55,7 @@ from ._http import (
|
|
|
58
55
|
configure_http_backend,
|
|
59
56
|
fix_hf_endpoint_in_url,
|
|
60
57
|
get_session,
|
|
58
|
+
hf_raise_for_status,
|
|
61
59
|
http_backoff,
|
|
62
60
|
reset_sessions,
|
|
63
61
|
)
|
|
@@ -84,6 +82,7 @@ from ._runtime import (
|
|
|
84
82
|
get_tf_version,
|
|
85
83
|
get_torch_version,
|
|
86
84
|
is_aiohttp_available,
|
|
85
|
+
is_colab_enterprise,
|
|
87
86
|
is_fastai_available,
|
|
88
87
|
is_fastapi_available,
|
|
89
88
|
is_fastcore_available,
|
|
@@ -22,6 +22,9 @@ from dataclasses import dataclass
|
|
|
22
22
|
from pathlib import Path
|
|
23
23
|
from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
|
|
24
24
|
|
|
25
|
+
from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
|
|
26
|
+
|
|
27
|
+
from ..commands._cli_utils import tabulate
|
|
25
28
|
from ..constants import HF_HUB_CACHE
|
|
26
29
|
from . import logging
|
|
27
30
|
|
|
@@ -34,20 +37,6 @@ REPO_TYPE_T = Literal["model", "dataset", "space"]
|
|
|
34
37
|
FILES_TO_IGNORE = [".DS_Store"]
|
|
35
38
|
|
|
36
39
|
|
|
37
|
-
class CacheNotFound(Exception):
|
|
38
|
-
"""Exception thrown when the Huggingface cache is not found."""
|
|
39
|
-
|
|
40
|
-
cache_dir: Union[str, Path]
|
|
41
|
-
|
|
42
|
-
def __init__(self, msg: str, cache_dir: Union[str, Path], *args, **kwargs):
|
|
43
|
-
super().__init__(msg, *args, **kwargs)
|
|
44
|
-
self.cache_dir = cache_dir
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class CorruptedCacheException(Exception):
|
|
48
|
-
"""Exception for any unexpected structure in the Huggingface cache-system."""
|
|
49
|
-
|
|
50
|
-
|
|
51
40
|
@dataclass(frozen=True)
|
|
52
41
|
class CachedFileInfo:
|
|
53
42
|
"""Frozen data structure holding information about a single cached file.
|
|
@@ -496,6 +485,100 @@ class HFCacheInfo:
|
|
|
496
485
|
expected_freed_size=delete_strategy_expected_freed_size,
|
|
497
486
|
)
|
|
498
487
|
|
|
488
|
+
def export_as_table(self, *, verbosity: int = 0) -> str:
|
|
489
|
+
"""Generate a table from the [`HFCacheInfo`] object.
|
|
490
|
+
|
|
491
|
+
Pass `verbosity=0` to get a table with a single row per repo, with columns
|
|
492
|
+
"repo_id", "repo_type", "size_on_disk", "nb_files", "last_accessed", "last_modified", "refs", "local_path".
|
|
493
|
+
|
|
494
|
+
Pass `verbosity=1` to get a table with a row per repo and revision (thus multiple rows can appear for a single repo), with columns
|
|
495
|
+
"repo_id", "repo_type", "revision", "size_on_disk", "nb_files", "last_modified", "refs", "local_path".
|
|
496
|
+
|
|
497
|
+
Example:
|
|
498
|
+
```py
|
|
499
|
+
>>> from huggingface_hub.utils import scan_cache_dir
|
|
500
|
+
|
|
501
|
+
>>> hf_cache_info = scan_cache_dir()
|
|
502
|
+
HFCacheInfo(...)
|
|
503
|
+
|
|
504
|
+
>>> print(hf_cache_info.export_as_table())
|
|
505
|
+
REPO ID REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
|
|
506
|
+
--------------------------------------------------- --------- ------------ -------- ------------- ------------- ---- --------------------------------------------------------------------------------------------------
|
|
507
|
+
roberta-base model 2.7M 5 1 day ago 1 week ago main ~/.cache/huggingface/hub/models--roberta-base
|
|
508
|
+
suno/bark model 8.8K 1 1 week ago 1 week ago main ~/.cache/huggingface/hub/models--suno--bark
|
|
509
|
+
t5-base model 893.8M 4 4 days ago 7 months ago main ~/.cache/huggingface/hub/models--t5-base
|
|
510
|
+
t5-large model 3.0G 4 5 weeks ago 5 months ago main ~/.cache/huggingface/hub/models--t5-large
|
|
511
|
+
|
|
512
|
+
>>> print(hf_cache_info.export_as_table(verbosity=1))
|
|
513
|
+
REPO ID REPO TYPE REVISION SIZE ON DISK NB FILES LAST_MODIFIED REFS LOCAL PATH
|
|
514
|
+
--------------------------------------------------- --------- ---------------------------------------- ------------ -------- ------------- ---- -----------------------------------------------------------------------------------------------------------------------------------------------------
|
|
515
|
+
roberta-base model e2da8e2f811d1448a5b465c236feacd80ffbac7b 2.7M 5 1 week ago main ~/.cache/huggingface/hub/models--roberta-base/snapshots/e2da8e2f811d1448a5b465c236feacd80ffbac7b
|
|
516
|
+
suno/bark model 70a8a7d34168586dc5d028fa9666aceade177992 8.8K 1 1 week ago main ~/.cache/huggingface/hub/models--suno--bark/snapshots/70a8a7d34168586dc5d028fa9666aceade177992
|
|
517
|
+
t5-base model a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1 893.8M 4 7 months ago main ~/.cache/huggingface/hub/models--t5-base/snapshots/a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1
|
|
518
|
+
t5-large model 150ebc2c4b72291e770f58e6057481c8d2ed331a 3.0G 4 5 months ago main ~/.cache/huggingface/hub/models--t5-large/snapshots/150ebc2c4b72291e770f58e6057481c8d2ed331a
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
verbosity (`int`, *optional*):
|
|
523
|
+
The verbosity level. Defaults to 0.
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
`str`: The table as a string.
|
|
527
|
+
"""
|
|
528
|
+
if verbosity == 0:
|
|
529
|
+
return tabulate(
|
|
530
|
+
rows=[
|
|
531
|
+
[
|
|
532
|
+
repo.repo_id,
|
|
533
|
+
repo.repo_type,
|
|
534
|
+
"{:>12}".format(repo.size_on_disk_str),
|
|
535
|
+
repo.nb_files,
|
|
536
|
+
repo.last_accessed_str,
|
|
537
|
+
repo.last_modified_str,
|
|
538
|
+
", ".join(sorted(repo.refs)),
|
|
539
|
+
str(repo.repo_path),
|
|
540
|
+
]
|
|
541
|
+
for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
|
|
542
|
+
],
|
|
543
|
+
headers=[
|
|
544
|
+
"REPO ID",
|
|
545
|
+
"REPO TYPE",
|
|
546
|
+
"SIZE ON DISK",
|
|
547
|
+
"NB FILES",
|
|
548
|
+
"LAST_ACCESSED",
|
|
549
|
+
"LAST_MODIFIED",
|
|
550
|
+
"REFS",
|
|
551
|
+
"LOCAL PATH",
|
|
552
|
+
],
|
|
553
|
+
)
|
|
554
|
+
else:
|
|
555
|
+
return tabulate(
|
|
556
|
+
rows=[
|
|
557
|
+
[
|
|
558
|
+
repo.repo_id,
|
|
559
|
+
repo.repo_type,
|
|
560
|
+
revision.commit_hash,
|
|
561
|
+
"{:>12}".format(revision.size_on_disk_str),
|
|
562
|
+
revision.nb_files,
|
|
563
|
+
revision.last_modified_str,
|
|
564
|
+
", ".join(sorted(revision.refs)),
|
|
565
|
+
str(revision.snapshot_path),
|
|
566
|
+
]
|
|
567
|
+
for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
|
|
568
|
+
for revision in sorted(repo.revisions, key=lambda revision: revision.commit_hash)
|
|
569
|
+
],
|
|
570
|
+
headers=[
|
|
571
|
+
"REPO ID",
|
|
572
|
+
"REPO TYPE",
|
|
573
|
+
"REVISION",
|
|
574
|
+
"SIZE ON DISK",
|
|
575
|
+
"NB FILES",
|
|
576
|
+
"LAST_MODIFIED",
|
|
577
|
+
"REFS",
|
|
578
|
+
"LOCAL PATH",
|
|
579
|
+
],
|
|
580
|
+
)
|
|
581
|
+
|
|
499
582
|
|
|
500
583
|
def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
501
584
|
"""Scan the entire HF cache-system and return a [`~HFCacheInfo`] structure.
|
huggingface_hub/utils/_fixes.py
CHANGED
|
@@ -18,7 +18,7 @@ from pathlib import Path
|
|
|
18
18
|
from typing import Callable, Generator, Optional, Union
|
|
19
19
|
|
|
20
20
|
import yaml
|
|
21
|
-
from filelock import BaseFileLock, FileLock, Timeout
|
|
21
|
+
from filelock import BaseFileLock, FileLock, SoftFileLock, Timeout
|
|
22
22
|
|
|
23
23
|
from .. import constants
|
|
24
24
|
from . import logging
|
|
@@ -84,13 +84,29 @@ def _set_write_permission_and_retry(func, path, excinfo):
|
|
|
84
84
|
|
|
85
85
|
@contextlib.contextmanager
|
|
86
86
|
def WeakFileLock(lock_file: Union[str, Path]) -> Generator[BaseFileLock, None, None]:
|
|
87
|
-
"""A filelock
|
|
87
|
+
"""A filelock with some custom logic.
|
|
88
|
+
|
|
89
|
+
This filelock is weaker than the default filelock in that:
|
|
90
|
+
1. It won't raise an exception if release fails.
|
|
91
|
+
2. It will default to a SoftFileLock if the filesystem does not support flock.
|
|
92
|
+
|
|
93
|
+
An INFO log message is emitted every 10 seconds if the lock is not acquired immediately.
|
|
94
|
+
"""
|
|
88
95
|
lock = FileLock(lock_file, timeout=constants.FILELOCK_LOG_EVERY_SECONDS)
|
|
89
96
|
while True:
|
|
90
97
|
try:
|
|
91
98
|
lock.acquire()
|
|
92
99
|
except Timeout:
|
|
93
100
|
logger.info("still waiting to acquire lock on %s", lock_file)
|
|
101
|
+
except NotImplementedError as e:
|
|
102
|
+
if "use SoftFileLock instead" in str(e):
|
|
103
|
+
# It's possible that the system does support flock, expect for one partition or filesystem.
|
|
104
|
+
# In this case, let's default to a SoftFileLock.
|
|
105
|
+
logger.warning(
|
|
106
|
+
"FileSystem does not appear to support flock. Falling back to SoftFileLock for %s", lock_file
|
|
107
|
+
)
|
|
108
|
+
lock = SoftFileLock(lock_file, timeout=constants.FILELOCK_LOG_EVERY_SECONDS)
|
|
109
|
+
continue
|
|
94
110
|
else:
|
|
95
111
|
break
|
|
96
112
|
|