huggingface-hub 0.24.6__py3-none-any.whl → 0.25.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +21 -1
- huggingface_hub/_commit_api.py +4 -4
- huggingface_hub/_inference_endpoints.py +13 -1
- huggingface_hub/_local_folder.py +191 -4
- huggingface_hub/_login.py +6 -6
- huggingface_hub/_snapshot_download.py +8 -17
- huggingface_hub/_space_api.py +5 -0
- huggingface_hub/_tensorboard_logger.py +29 -13
- huggingface_hub/_upload_large_folder.py +573 -0
- huggingface_hub/_webhooks_server.py +1 -1
- huggingface_hub/commands/_cli_utils.py +5 -0
- huggingface_hub/commands/download.py +8 -0
- huggingface_hub/commands/huggingface_cli.py +6 -1
- huggingface_hub/commands/lfs.py +2 -1
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +99 -57
- huggingface_hub/commands/tag.py +1 -1
- huggingface_hub/commands/upload.py +2 -1
- huggingface_hub/commands/upload_large_folder.py +129 -0
- huggingface_hub/commands/version.py +37 -0
- huggingface_hub/community.py +2 -2
- huggingface_hub/errors.py +218 -1
- huggingface_hub/fastai_utils.py +2 -3
- huggingface_hub/file_download.py +63 -63
- huggingface_hub/hf_api.py +758 -314
- huggingface_hub/hf_file_system.py +15 -23
- huggingface_hub/hub_mixin.py +27 -25
- huggingface_hub/inference/_client.py +78 -127
- huggingface_hub/inference/_generated/_async_client.py +169 -144
- huggingface_hub/inference/_generated/types/base.py +0 -9
- huggingface_hub/inference/_templating.py +2 -3
- huggingface_hub/inference_api.py +2 -2
- huggingface_hub/keras_mixin.py +2 -2
- huggingface_hub/lfs.py +7 -98
- huggingface_hub/repocard.py +6 -5
- huggingface_hub/repository.py +5 -5
- huggingface_hub/serialization/_torch.py +64 -11
- huggingface_hub/utils/__init__.py +13 -14
- huggingface_hub/utils/_cache_manager.py +97 -14
- huggingface_hub/utils/_fixes.py +18 -2
- huggingface_hub/utils/_http.py +228 -2
- huggingface_hub/utils/_lfs.py +110 -0
- huggingface_hub/utils/_runtime.py +7 -1
- huggingface_hub/utils/_token.py +3 -2
- {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/METADATA +2 -2
- {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/RECORD +50 -48
- huggingface_hub/inference/_types.py +0 -52
- huggingface_hub/utils/_errors.py +0 -397
- {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/file_download.py
CHANGED
|
@@ -18,41 +18,29 @@ from urllib.parse import quote, urlparse
|
|
|
18
18
|
|
|
19
19
|
import requests
|
|
20
20
|
|
|
21
|
-
from . import
|
|
21
|
+
from . import (
|
|
22
|
+
__version__, # noqa: F401 # for backward compatibility
|
|
23
|
+
constants,
|
|
24
|
+
)
|
|
22
25
|
from ._local_folder import (
|
|
23
26
|
get_local_download_paths,
|
|
24
27
|
read_download_metadata,
|
|
25
28
|
write_download_metadata,
|
|
26
29
|
)
|
|
27
30
|
from .constants import (
|
|
28
|
-
|
|
29
|
-
DEFAULT_REQUEST_TIMEOUT,
|
|
30
|
-
DEFAULT_REVISION,
|
|
31
|
-
DOWNLOAD_CHUNK_SIZE,
|
|
32
|
-
ENDPOINT,
|
|
33
|
-
HF_HUB_CACHE,
|
|
34
|
-
HF_HUB_DISABLE_SYMLINKS_WARNING,
|
|
35
|
-
HF_HUB_DOWNLOAD_TIMEOUT,
|
|
36
|
-
HF_HUB_ENABLE_HF_TRANSFER,
|
|
37
|
-
HF_HUB_ETAG_TIMEOUT,
|
|
38
|
-
HF_TRANSFER_CONCURRENCY,
|
|
39
|
-
HUGGINGFACE_CO_URL_TEMPLATE,
|
|
40
|
-
HUGGINGFACE_HEADER_X_LINKED_ETAG,
|
|
41
|
-
HUGGINGFACE_HEADER_X_LINKED_SIZE,
|
|
42
|
-
HUGGINGFACE_HEADER_X_REPO_COMMIT,
|
|
31
|
+
HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility
|
|
43
32
|
HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
|
|
44
|
-
REPO_ID_SEPARATOR,
|
|
45
|
-
REPO_TYPES,
|
|
46
|
-
REPO_TYPES_URL_PREFIXES,
|
|
47
33
|
)
|
|
48
|
-
from .
|
|
34
|
+
from .errors import (
|
|
49
35
|
EntryNotFoundError,
|
|
50
36
|
FileMetadataError,
|
|
51
37
|
GatedRepoError,
|
|
52
38
|
LocalEntryNotFoundError,
|
|
53
|
-
OfflineModeIsEnabled,
|
|
54
39
|
RepositoryNotFoundError,
|
|
55
40
|
RevisionNotFoundError,
|
|
41
|
+
)
|
|
42
|
+
from .utils import (
|
|
43
|
+
OfflineModeIsEnabled,
|
|
56
44
|
SoftTemporaryDirectory,
|
|
57
45
|
WeakFileLock,
|
|
58
46
|
build_hf_headers,
|
|
@@ -116,7 +104,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
|
|
|
116
104
|
"""
|
|
117
105
|
# Defaults to HF cache
|
|
118
106
|
if cache_dir is None:
|
|
119
|
-
cache_dir = HF_HUB_CACHE
|
|
107
|
+
cache_dir = constants.HF_HUB_CACHE
|
|
120
108
|
cache_dir = str(Path(cache_dir).expanduser().resolve()) # make it unique
|
|
121
109
|
|
|
122
110
|
# Check symlink compatibility only once (per cache directory) at first time use
|
|
@@ -137,7 +125,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
|
|
|
137
125
|
# Likely running on Windows
|
|
138
126
|
_are_symlinks_supported_in_dir[cache_dir] = False
|
|
139
127
|
|
|
140
|
-
if not HF_HUB_DISABLE_SYMLINKS_WARNING:
|
|
128
|
+
if not constants.HF_HUB_DISABLE_SYMLINKS_WARNING:
|
|
141
129
|
message = (
|
|
142
130
|
"`huggingface_hub` cache-system uses symlinks by default to"
|
|
143
131
|
" efficiently store duplicated files but your machine does not"
|
|
@@ -152,7 +140,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
|
|
|
152
140
|
message += (
|
|
153
141
|
"\nTo support symlinks on Windows, you either need to"
|
|
154
142
|
" activate Developer Mode or to run Python as an"
|
|
155
|
-
" administrator. In order to
|
|
143
|
+
" administrator. In order to activate developer mode,"
|
|
156
144
|
" see this article:"
|
|
157
145
|
" https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development"
|
|
158
146
|
)
|
|
@@ -257,20 +245,20 @@ def hf_hub_url(
|
|
|
257
245
|
if subfolder is not None:
|
|
258
246
|
filename = f"{subfolder}/{filename}"
|
|
259
247
|
|
|
260
|
-
if repo_type not in REPO_TYPES:
|
|
248
|
+
if repo_type not in constants.REPO_TYPES:
|
|
261
249
|
raise ValueError("Invalid repo type")
|
|
262
250
|
|
|
263
|
-
if repo_type in REPO_TYPES_URL_PREFIXES:
|
|
264
|
-
repo_id = REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
|
|
251
|
+
if repo_type in constants.REPO_TYPES_URL_PREFIXES:
|
|
252
|
+
repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
|
|
265
253
|
|
|
266
254
|
if revision is None:
|
|
267
|
-
revision = DEFAULT_REVISION
|
|
255
|
+
revision = constants.DEFAULT_REVISION
|
|
268
256
|
url = HUGGINGFACE_CO_URL_TEMPLATE.format(
|
|
269
257
|
repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
|
|
270
258
|
)
|
|
271
259
|
# Update endpoint if provided
|
|
272
|
-
if endpoint is not None and url.startswith(ENDPOINT):
|
|
273
|
-
url = endpoint + url[len(ENDPOINT) :]
|
|
260
|
+
if endpoint is not None and url.startswith(constants.ENDPOINT):
|
|
261
|
+
url = endpoint + url[len(constants.ENDPOINT) :]
|
|
274
262
|
return url
|
|
275
263
|
|
|
276
264
|
|
|
@@ -333,7 +321,7 @@ def filename_to_url(
|
|
|
333
321
|
)
|
|
334
322
|
|
|
335
323
|
if cache_dir is None:
|
|
336
|
-
cache_dir = HF_HUB_CACHE
|
|
324
|
+
cache_dir = constants.HF_HUB_CACHE
|
|
337
325
|
if isinstance(cache_dir, Path):
|
|
338
326
|
cache_dir = str(cache_dir)
|
|
339
327
|
|
|
@@ -439,8 +427,12 @@ def http_get(
|
|
|
439
427
|
The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If
|
|
440
428
|
not set, the filename is guessed from the URL or the `Content-Disposition` header.
|
|
441
429
|
"""
|
|
430
|
+
if expected_size is not None and resume_size == expected_size:
|
|
431
|
+
# If the file is already fully downloaded, we don't need to download it again.
|
|
432
|
+
return
|
|
433
|
+
|
|
442
434
|
hf_transfer = None
|
|
443
|
-
if HF_HUB_ENABLE_HF_TRANSFER:
|
|
435
|
+
if constants.HF_HUB_ENABLE_HF_TRANSFER:
|
|
444
436
|
if resume_size != 0:
|
|
445
437
|
warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
|
|
446
438
|
elif proxies is not None:
|
|
@@ -461,7 +453,7 @@ def http_get(
|
|
|
461
453
|
headers["Range"] = "bytes=%d-" % (resume_size,)
|
|
462
454
|
|
|
463
455
|
r = _request_wrapper(
|
|
464
|
-
method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=HF_HUB_DOWNLOAD_TIMEOUT
|
|
456
|
+
method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
|
|
465
457
|
)
|
|
466
458
|
hf_raise_for_status(r)
|
|
467
459
|
content_length = r.headers.get("Content-Length")
|
|
@@ -511,7 +503,7 @@ def http_get(
|
|
|
511
503
|
)
|
|
512
504
|
|
|
513
505
|
with progress_cm as progress:
|
|
514
|
-
if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
|
|
506
|
+
if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
|
|
515
507
|
supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
|
|
516
508
|
if not supports_callback:
|
|
517
509
|
warnings.warn(
|
|
@@ -523,8 +515,8 @@ def http_get(
|
|
|
523
515
|
hf_transfer.download(
|
|
524
516
|
url=url,
|
|
525
517
|
filename=temp_file.name,
|
|
526
|
-
max_files=HF_TRANSFER_CONCURRENCY,
|
|
527
|
-
chunk_size=DOWNLOAD_CHUNK_SIZE,
|
|
518
|
+
max_files=constants.HF_TRANSFER_CONCURRENCY,
|
|
519
|
+
chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
|
|
528
520
|
headers=headers,
|
|
529
521
|
parallel_failures=3,
|
|
530
522
|
max_retries=5,
|
|
@@ -546,7 +538,7 @@ def http_get(
|
|
|
546
538
|
return
|
|
547
539
|
new_resume_size = resume_size
|
|
548
540
|
try:
|
|
549
|
-
for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
|
|
541
|
+
for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
|
|
550
542
|
if chunk: # filter out keep-alive new chunks
|
|
551
543
|
progress.update(len(chunk))
|
|
552
544
|
temp_file.write(chunk)
|
|
@@ -594,7 +586,7 @@ def cached_download(
|
|
|
594
586
|
force_download: bool = False,
|
|
595
587
|
force_filename: Optional[str] = None,
|
|
596
588
|
proxies: Optional[Dict] = None,
|
|
597
|
-
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
|
|
589
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
598
590
|
resume_download: Optional[bool] = None,
|
|
599
591
|
token: Union[bool, str, None] = None,
|
|
600
592
|
local_files_only: bool = False,
|
|
@@ -672,9 +664,9 @@ def cached_download(
|
|
|
672
664
|
|
|
673
665
|
</Tip>
|
|
674
666
|
"""
|
|
675
|
-
if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
|
|
667
|
+
if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT:
|
|
676
668
|
# Respect environment variable above user value
|
|
677
|
-
etag_timeout = HF_HUB_ETAG_TIMEOUT
|
|
669
|
+
etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
|
|
678
670
|
|
|
679
671
|
if not legacy_cache_layout:
|
|
680
672
|
warnings.warn(
|
|
@@ -691,7 +683,7 @@ def cached_download(
|
|
|
691
683
|
)
|
|
692
684
|
|
|
693
685
|
if cache_dir is None:
|
|
694
|
-
cache_dir = HF_HUB_CACHE
|
|
686
|
+
cache_dir = constants.HF_HUB_CACHE
|
|
695
687
|
if isinstance(cache_dir, Path):
|
|
696
688
|
cache_dir = str(cache_dir)
|
|
697
689
|
|
|
@@ -723,7 +715,7 @@ def cached_download(
|
|
|
723
715
|
)
|
|
724
716
|
headers.pop("Accept-Encoding", None)
|
|
725
717
|
hf_raise_for_status(r)
|
|
726
|
-
etag = r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")
|
|
718
|
+
etag = r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")
|
|
727
719
|
# We favor a custom header indicating the etag of the linked resource, and
|
|
728
720
|
# we fallback to the regular etag header.
|
|
729
721
|
# If we don't have any of those, raise an error.
|
|
@@ -970,7 +962,7 @@ def repo_folder_name(*, repo_id: str, repo_type: str) -> str:
|
|
|
970
962
|
"""
|
|
971
963
|
# remove all `/` occurrences to correctly convert repo to directory name
|
|
972
964
|
parts = [f"{repo_type}s", *repo_id.split("/")]
|
|
973
|
-
return REPO_ID_SEPARATOR.join(parts)
|
|
965
|
+
return constants.REPO_ID_SEPARATOR.join(parts)
|
|
974
966
|
|
|
975
967
|
|
|
976
968
|
def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
|
|
@@ -1021,7 +1013,7 @@ def hf_hub_download(
|
|
|
1021
1013
|
user_agent: Union[Dict, str, None] = None,
|
|
1022
1014
|
force_download: bool = False,
|
|
1023
1015
|
proxies: Optional[Dict] = None,
|
|
1024
|
-
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
|
|
1016
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
1025
1017
|
token: Union[bool, str, None] = None,
|
|
1026
1018
|
local_files_only: bool = False,
|
|
1027
1019
|
headers: Optional[Dict[str, str]] = None,
|
|
@@ -1135,9 +1127,9 @@ def hf_hub_download(
|
|
|
1135
1127
|
If some parameter value is invalid.
|
|
1136
1128
|
|
|
1137
1129
|
"""
|
|
1138
|
-
if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
|
|
1130
|
+
if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT:
|
|
1139
1131
|
# Respect environment variable above user value
|
|
1140
|
-
etag_timeout = HF_HUB_ETAG_TIMEOUT
|
|
1132
|
+
etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
|
|
1141
1133
|
|
|
1142
1134
|
if force_filename is not None:
|
|
1143
1135
|
warnings.warn(
|
|
@@ -1180,9 +1172,9 @@ def hf_hub_download(
|
|
|
1180
1172
|
)
|
|
1181
1173
|
|
|
1182
1174
|
if cache_dir is None:
|
|
1183
|
-
cache_dir = HF_HUB_CACHE
|
|
1175
|
+
cache_dir = constants.HF_HUB_CACHE
|
|
1184
1176
|
if revision is None:
|
|
1185
|
-
revision = DEFAULT_REVISION
|
|
1177
|
+
revision = constants.DEFAULT_REVISION
|
|
1186
1178
|
if isinstance(cache_dir, Path):
|
|
1187
1179
|
cache_dir = str(cache_dir)
|
|
1188
1180
|
if isinstance(local_dir, Path):
|
|
@@ -1196,8 +1188,8 @@ def hf_hub_download(
|
|
|
1196
1188
|
|
|
1197
1189
|
if repo_type is None:
|
|
1198
1190
|
repo_type = "model"
|
|
1199
|
-
if repo_type not in REPO_TYPES:
|
|
1200
|
-
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
|
|
1191
|
+
if repo_type not in constants.REPO_TYPES:
|
|
1192
|
+
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
|
|
1201
1193
|
|
|
1202
1194
|
headers = build_hf_headers(
|
|
1203
1195
|
token=token,
|
|
@@ -1396,7 +1388,8 @@ def _hf_hub_download_to_cache_dir(
|
|
|
1396
1388
|
filename=filename,
|
|
1397
1389
|
force_download=force_download,
|
|
1398
1390
|
)
|
|
1399
|
-
|
|
1391
|
+
if not os.path.exists(pointer_path):
|
|
1392
|
+
_create_symlink(blob_path, pointer_path, new_blob=True)
|
|
1400
1393
|
|
|
1401
1394
|
return pointer_path
|
|
1402
1395
|
|
|
@@ -1581,10 +1574,10 @@ def try_to_load_from_cache(
|
|
|
1581
1574
|
revision = "main"
|
|
1582
1575
|
if repo_type is None:
|
|
1583
1576
|
repo_type = "model"
|
|
1584
|
-
if repo_type not in REPO_TYPES:
|
|
1585
|
-
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
|
|
1577
|
+
if repo_type not in constants.REPO_TYPES:
|
|
1578
|
+
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
|
|
1586
1579
|
if cache_dir is None:
|
|
1587
|
-
cache_dir = HF_HUB_CACHE
|
|
1580
|
+
cache_dir = constants.HF_HUB_CACHE
|
|
1588
1581
|
|
|
1589
1582
|
object_id = repo_id.replace("/", "--")
|
|
1590
1583
|
repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}")
|
|
@@ -1625,7 +1618,7 @@ def get_hf_file_metadata(
|
|
|
1625
1618
|
url: str,
|
|
1626
1619
|
token: Union[bool, str, None] = None,
|
|
1627
1620
|
proxies: Optional[Dict] = None,
|
|
1628
|
-
timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT,
|
|
1621
|
+
timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
|
|
1629
1622
|
library_name: Optional[str] = None,
|
|
1630
1623
|
library_version: Optional[str] = None,
|
|
1631
1624
|
user_agent: Union[Dict, str, None] = None,
|
|
@@ -1683,15 +1676,17 @@ def get_hf_file_metadata(
|
|
|
1683
1676
|
|
|
1684
1677
|
# Return
|
|
1685
1678
|
return HfFileMetadata(
|
|
1686
|
-
commit_hash=r.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT),
|
|
1679
|
+
commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
|
|
1687
1680
|
# We favor a custom header indicating the etag of the linked resource, and
|
|
1688
1681
|
# we fallback to the regular etag header.
|
|
1689
|
-
etag=_normalize_etag(r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
|
|
1682
|
+
etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
|
|
1690
1683
|
# Either from response headers (if redirected) or defaults to request url
|
|
1691
1684
|
# Do not use directly `url`, as `_request_wrapper` might have followed relative
|
|
1692
1685
|
# redirects.
|
|
1693
1686
|
location=r.headers.get("Location") or r.request.url, # type: ignore
|
|
1694
|
-
size=_int_or_none(
|
|
1687
|
+
size=_int_or_none(
|
|
1688
|
+
r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
|
|
1689
|
+
),
|
|
1695
1690
|
)
|
|
1696
1691
|
|
|
1697
1692
|
|
|
@@ -1736,7 +1731,7 @@ def _get_metadata_or_catch_error(
|
|
|
1736
1731
|
),
|
|
1737
1732
|
)
|
|
1738
1733
|
|
|
1739
|
-
url =
|
|
1734
|
+
url = hf_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint)
|
|
1740
1735
|
url_to_download: str = url
|
|
1741
1736
|
etag: Optional[str] = None
|
|
1742
1737
|
commit_hash: Optional[str] = None
|
|
@@ -1754,11 +1749,16 @@ def _get_metadata_or_catch_error(
|
|
|
1754
1749
|
except EntryNotFoundError as http_error:
|
|
1755
1750
|
if storage_folder is not None and relative_filename is not None:
|
|
1756
1751
|
# Cache the non-existence of the file
|
|
1757
|
-
commit_hash = http_error.response.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT)
|
|
1752
|
+
commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
|
|
1758
1753
|
if commit_hash is not None:
|
|
1759
1754
|
no_exist_file_path = Path(storage_folder) / ".no_exist" / commit_hash / relative_filename
|
|
1760
1755
|
no_exist_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1761
|
-
|
|
1756
|
+
try:
|
|
1757
|
+
no_exist_file_path.touch()
|
|
1758
|
+
except OSError as e:
|
|
1759
|
+
logger.error(
|
|
1760
|
+
f"Could not cache non-existence of file. Will ignore error and continue. Error: {e}"
|
|
1761
|
+
)
|
|
1762
1762
|
_cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
|
|
1763
1763
|
raise
|
|
1764
1764
|
|
|
@@ -1888,14 +1888,14 @@ def _download_to_tmp_and_move(
|
|
|
1888
1888
|
# Do nothing if already exists (except if force_download=True)
|
|
1889
1889
|
return
|
|
1890
1890
|
|
|
1891
|
-
if incomplete_path.exists() and (force_download or (HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
|
|
1891
|
+
if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
|
|
1892
1892
|
# By default, we will try to resume the download if possible.
|
|
1893
1893
|
# However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
|
|
1894
1894
|
# not resume the download => delete the incomplete file.
|
|
1895
1895
|
message = f"Removing incomplete file '{incomplete_path}'"
|
|
1896
1896
|
if force_download:
|
|
1897
1897
|
message += " (force_download=True)"
|
|
1898
|
-
elif HF_HUB_ENABLE_HF_TRANSFER and not proxies:
|
|
1898
|
+
elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
|
|
1899
1899
|
message += " (hf_transfer=True)"
|
|
1900
1900
|
logger.info(message)
|
|
1901
1901
|
incomplete_path.unlink(missing_ok=True)
|