huggingface-hub 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +4 -7
- huggingface_hub/_commit_api.py +126 -66
- huggingface_hub/_commit_scheduler.py +4 -7
- huggingface_hub/_login.py +10 -16
- huggingface_hub/_snapshot_download.py +119 -21
- huggingface_hub/_tensorboard_logger.py +2 -5
- huggingface_hub/_upload_large_folder.py +1 -2
- huggingface_hub/_webhooks_server.py +8 -20
- huggingface_hub/cli/_cli_utils.py +12 -6
- huggingface_hub/cli/download.py +32 -7
- huggingface_hub/cli/repo.py +137 -5
- huggingface_hub/dataclasses.py +122 -2
- huggingface_hub/errors.py +4 -0
- huggingface_hub/fastai_utils.py +22 -32
- huggingface_hub/file_download.py +234 -38
- huggingface_hub/hf_api.py +385 -424
- huggingface_hub/hf_file_system.py +55 -65
- huggingface_hub/inference/_client.py +27 -48
- huggingface_hub/inference/_generated/_async_client.py +27 -48
- huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
- huggingface_hub/inference/_mcp/agent.py +2 -5
- huggingface_hub/inference/_mcp/mcp_client.py +6 -8
- huggingface_hub/inference/_providers/__init__.py +16 -0
- huggingface_hub/inference/_providers/_common.py +2 -0
- huggingface_hub/inference/_providers/fal_ai.py +2 -0
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +14 -8
- huggingface_hub/repocard.py +12 -16
- huggingface_hub/serialization/_base.py +3 -6
- huggingface_hub/serialization/_torch.py +16 -34
- huggingface_hub/utils/__init__.py +1 -2
- huggingface_hub/utils/_cache_manager.py +42 -72
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_http.py +37 -68
- huggingface_hub/utils/_validators.py +2 -2
- huggingface_hub/utils/logging.py +8 -11
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/METADATA +2 -2
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/RECORD +44 -56
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/_cli_utils.py +0 -74
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -195
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -207
- huggingface_hub/commands/version.py +0 -40
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/LICENSE +0 -0
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/WHEEL +0 -0
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/top_level.txt +0 -0
huggingface_hub/file_download.py
CHANGED
|
@@ -9,7 +9,7 @@ import uuid
|
|
|
9
9
|
import warnings
|
|
10
10
|
from dataclasses import dataclass
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Any, BinaryIO, NoReturn, Optional, Union
|
|
12
|
+
from typing import Any, BinaryIO, Literal, NoReturn, Optional, Union, overload
|
|
13
13
|
from urllib.parse import quote, urlparse
|
|
14
14
|
|
|
15
15
|
import httpx
|
|
@@ -149,6 +149,34 @@ class HfFileMetadata:
|
|
|
149
149
|
xet_file_data: Optional[XetFileData]
|
|
150
150
|
|
|
151
151
|
|
|
152
|
+
@dataclass
|
|
153
|
+
class DryRunFileInfo:
|
|
154
|
+
"""Information returned when performing a dry run of a file download.
|
|
155
|
+
|
|
156
|
+
Returned by [`hf_hub_download`] when `dry_run=True`.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
commit_hash (`str`):
|
|
160
|
+
The commit_hash related to the file.
|
|
161
|
+
file_size (`int`):
|
|
162
|
+
Size of the file. In case of an LFS file, contains the size of the actual LFS file, not the pointer.
|
|
163
|
+
filename (`str`):
|
|
164
|
+
Name of the file in the repo.
|
|
165
|
+
is_cached (`bool`):
|
|
166
|
+
Whether the file is already cached locally.
|
|
167
|
+
will_download (`bool`):
|
|
168
|
+
Whether the file will be downloaded if `hf_hub_download` is called with `dry_run=False`.
|
|
169
|
+
In practice, will_download is `True` if the file is not cached or if `force_download=True`.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
commit_hash: str
|
|
173
|
+
file_size: int
|
|
174
|
+
filename: str
|
|
175
|
+
local_path: str
|
|
176
|
+
is_cached: bool
|
|
177
|
+
will_download: bool
|
|
178
|
+
|
|
179
|
+
|
|
152
180
|
@validate_hf_hub_args
|
|
153
181
|
def hf_hub_url(
|
|
154
182
|
repo_id: str,
|
|
@@ -191,26 +219,23 @@ def hf_hub_url(
|
|
|
191
219
|
'https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin'
|
|
192
220
|
```
|
|
193
221
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
if stored in git, or its sha256 if stored in git-lfs.
|
|
212
|
-
|
|
213
|
-
</Tip>
|
|
222
|
+
> [!TIP]
|
|
223
|
+
> Notes:
|
|
224
|
+
>
|
|
225
|
+
> Cloudfront is replicated over the globe so downloads are way faster for
|
|
226
|
+
> the end user (and it also lowers our bandwidth costs).
|
|
227
|
+
>
|
|
228
|
+
> Cloudfront aggressively caches files by default (default TTL is 24
|
|
229
|
+
> hours), however this is not an issue here because we implement a
|
|
230
|
+
> git-based versioning system on huggingface.co, which means that we store
|
|
231
|
+
> the files on S3/Cloudfront in a content-addressable way (i.e., the file
|
|
232
|
+
> name is its hash). Using content-addressable filenames means cache can't
|
|
233
|
+
> ever be stale.
|
|
234
|
+
>
|
|
235
|
+
> In terms of client-side caching from this library, we base our caching
|
|
236
|
+
> on the objects' entity tag (`ETag`), which is an identifier of a
|
|
237
|
+
> specific version of a resource [1]_. An object's ETag is: its git-sha1
|
|
238
|
+
> if stored in git, or its sha256 if stored in git-lfs.
|
|
214
239
|
|
|
215
240
|
References:
|
|
216
241
|
|
|
@@ -243,7 +268,7 @@ def _httpx_follow_relative_redirects(method: HTTP_METHOD_T, url: str, **httpx_kw
|
|
|
243
268
|
|
|
244
269
|
This is useful to follow a redirection to a renamed repository without following redirection to a CDN.
|
|
245
270
|
|
|
246
|
-
A backoff mechanism retries the HTTP call on
|
|
271
|
+
A backoff mechanism retries the HTTP call on 5xx errors and network errors.
|
|
247
272
|
|
|
248
273
|
Args:
|
|
249
274
|
method (`str`):
|
|
@@ -766,6 +791,75 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
|
|
|
766
791
|
pass
|
|
767
792
|
|
|
768
793
|
|
|
794
|
+
@overload
|
|
795
|
+
def hf_hub_download(
|
|
796
|
+
repo_id: str,
|
|
797
|
+
filename: str,
|
|
798
|
+
*,
|
|
799
|
+
subfolder: Optional[str] = None,
|
|
800
|
+
repo_type: Optional[str] = None,
|
|
801
|
+
revision: Optional[str] = None,
|
|
802
|
+
library_name: Optional[str] = None,
|
|
803
|
+
library_version: Optional[str] = None,
|
|
804
|
+
cache_dir: Union[str, Path, None] = None,
|
|
805
|
+
local_dir: Union[str, Path, None] = None,
|
|
806
|
+
user_agent: Union[dict, str, None] = None,
|
|
807
|
+
force_download: bool = False,
|
|
808
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
809
|
+
token: Union[bool, str, None] = None,
|
|
810
|
+
local_files_only: bool = False,
|
|
811
|
+
headers: Optional[dict[str, str]] = None,
|
|
812
|
+
endpoint: Optional[str] = None,
|
|
813
|
+
dry_run: Literal[False] = False,
|
|
814
|
+
) -> str: ...
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
@overload
|
|
818
|
+
def hf_hub_download(
|
|
819
|
+
repo_id: str,
|
|
820
|
+
filename: str,
|
|
821
|
+
*,
|
|
822
|
+
subfolder: Optional[str] = None,
|
|
823
|
+
repo_type: Optional[str] = None,
|
|
824
|
+
revision: Optional[str] = None,
|
|
825
|
+
library_name: Optional[str] = None,
|
|
826
|
+
library_version: Optional[str] = None,
|
|
827
|
+
cache_dir: Union[str, Path, None] = None,
|
|
828
|
+
local_dir: Union[str, Path, None] = None,
|
|
829
|
+
user_agent: Union[dict, str, None] = None,
|
|
830
|
+
force_download: bool = False,
|
|
831
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
832
|
+
token: Union[bool, str, None] = None,
|
|
833
|
+
local_files_only: bool = False,
|
|
834
|
+
headers: Optional[dict[str, str]] = None,
|
|
835
|
+
endpoint: Optional[str] = None,
|
|
836
|
+
dry_run: Literal[True] = True,
|
|
837
|
+
) -> DryRunFileInfo: ...
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
@overload
|
|
841
|
+
def hf_hub_download(
|
|
842
|
+
repo_id: str,
|
|
843
|
+
filename: str,
|
|
844
|
+
*,
|
|
845
|
+
subfolder: Optional[str] = None,
|
|
846
|
+
repo_type: Optional[str] = None,
|
|
847
|
+
revision: Optional[str] = None,
|
|
848
|
+
library_name: Optional[str] = None,
|
|
849
|
+
library_version: Optional[str] = None,
|
|
850
|
+
cache_dir: Union[str, Path, None] = None,
|
|
851
|
+
local_dir: Union[str, Path, None] = None,
|
|
852
|
+
user_agent: Union[dict, str, None] = None,
|
|
853
|
+
force_download: bool = False,
|
|
854
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
855
|
+
token: Union[bool, str, None] = None,
|
|
856
|
+
local_files_only: bool = False,
|
|
857
|
+
headers: Optional[dict[str, str]] = None,
|
|
858
|
+
endpoint: Optional[str] = None,
|
|
859
|
+
dry_run: bool = False,
|
|
860
|
+
) -> Union[str, DryRunFileInfo]: ...
|
|
861
|
+
|
|
862
|
+
|
|
769
863
|
@validate_hf_hub_args
|
|
770
864
|
def hf_hub_download(
|
|
771
865
|
repo_id: str,
|
|
@@ -785,7 +879,8 @@ def hf_hub_download(
|
|
|
785
879
|
local_files_only: bool = False,
|
|
786
880
|
headers: Optional[dict[str, str]] = None,
|
|
787
881
|
endpoint: Optional[str] = None,
|
|
788
|
-
|
|
882
|
+
dry_run: bool = False,
|
|
883
|
+
) -> Union[str, DryRunFileInfo]:
|
|
789
884
|
"""Download a given file if it's not already present in the local cache.
|
|
790
885
|
|
|
791
886
|
The new cache file layout looks like this:
|
|
@@ -860,9 +955,14 @@ def hf_hub_download(
|
|
|
860
955
|
local cached file if it exists.
|
|
861
956
|
headers (`dict`, *optional*):
|
|
862
957
|
Additional headers to be sent with the request.
|
|
958
|
+
dry_run (`bool`, *optional*, defaults to `False`):
|
|
959
|
+
If `True`, perform a dry run without actually downloading the file. Returns a
|
|
960
|
+
[`DryRunFileInfo`] object containing information about what would be downloaded.
|
|
863
961
|
|
|
864
962
|
Returns:
|
|
865
|
-
`str
|
|
963
|
+
`str` or [`DryRunFileInfo`]:
|
|
964
|
+
- If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
|
|
965
|
+
- If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
|
|
866
966
|
|
|
867
967
|
Raises:
|
|
868
968
|
[`~utils.RepositoryNotFoundError`]
|
|
@@ -932,6 +1032,7 @@ def hf_hub_download(
|
|
|
932
1032
|
cache_dir=cache_dir,
|
|
933
1033
|
force_download=force_download,
|
|
934
1034
|
local_files_only=local_files_only,
|
|
1035
|
+
dry_run=dry_run,
|
|
935
1036
|
)
|
|
936
1037
|
else:
|
|
937
1038
|
return _hf_hub_download_to_cache_dir(
|
|
@@ -950,6 +1051,7 @@ def hf_hub_download(
|
|
|
950
1051
|
# Additional options
|
|
951
1052
|
local_files_only=local_files_only,
|
|
952
1053
|
force_download=force_download,
|
|
1054
|
+
dry_run=dry_run,
|
|
953
1055
|
)
|
|
954
1056
|
|
|
955
1057
|
|
|
@@ -970,7 +1072,8 @@ def _hf_hub_download_to_cache_dir(
|
|
|
970
1072
|
# Additional options
|
|
971
1073
|
local_files_only: bool,
|
|
972
1074
|
force_download: bool,
|
|
973
|
-
|
|
1075
|
+
dry_run: bool,
|
|
1076
|
+
) -> Union[str, DryRunFileInfo]:
|
|
974
1077
|
"""Download a given file to a cache folder, if not already present.
|
|
975
1078
|
|
|
976
1079
|
Method should not be called directly. Please use `hf_hub_download` instead.
|
|
@@ -990,8 +1093,18 @@ def _hf_hub_download_to_cache_dir(
|
|
|
990
1093
|
# if user provides a commit_hash and they already have the file on disk, shortcut everything.
|
|
991
1094
|
if REGEX_COMMIT_HASH.match(revision):
|
|
992
1095
|
pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
|
|
993
|
-
if os.path.exists(pointer_path)
|
|
994
|
-
|
|
1096
|
+
if os.path.exists(pointer_path):
|
|
1097
|
+
if dry_run:
|
|
1098
|
+
return DryRunFileInfo(
|
|
1099
|
+
commit_hash=revision,
|
|
1100
|
+
file_size=os.path.getsize(pointer_path),
|
|
1101
|
+
filename=filename,
|
|
1102
|
+
is_cached=True,
|
|
1103
|
+
local_path=pointer_path,
|
|
1104
|
+
will_download=force_download,
|
|
1105
|
+
)
|
|
1106
|
+
if not force_download:
|
|
1107
|
+
return pointer_path
|
|
995
1108
|
|
|
996
1109
|
# Try to get metadata (etag, commit_hash, url, size) from the server.
|
|
997
1110
|
# If we can't, a HEAD request error is returned.
|
|
@@ -1034,8 +1147,18 @@ def _hf_hub_download_to_cache_dir(
|
|
|
1034
1147
|
# Return pointer file if exists
|
|
1035
1148
|
if commit_hash is not None:
|
|
1036
1149
|
pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
|
|
1037
|
-
if os.path.exists(pointer_path)
|
|
1038
|
-
|
|
1150
|
+
if os.path.exists(pointer_path):
|
|
1151
|
+
if dry_run:
|
|
1152
|
+
return DryRunFileInfo(
|
|
1153
|
+
commit_hash=commit_hash,
|
|
1154
|
+
file_size=os.path.getsize(pointer_path),
|
|
1155
|
+
filename=filename,
|
|
1156
|
+
is_cached=True,
|
|
1157
|
+
local_path=pointer_path,
|
|
1158
|
+
will_download=force_download,
|
|
1159
|
+
)
|
|
1160
|
+
if not force_download:
|
|
1161
|
+
return pointer_path
|
|
1039
1162
|
|
|
1040
1163
|
# Otherwise, raise appropriate error
|
|
1041
1164
|
_raise_on_head_call_error(head_call_error, force_download, local_files_only)
|
|
@@ -1048,6 +1171,17 @@ def _hf_hub_download_to_cache_dir(
|
|
|
1048
1171
|
blob_path = os.path.join(storage_folder, "blobs", etag)
|
|
1049
1172
|
pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
|
|
1050
1173
|
|
|
1174
|
+
if dry_run:
|
|
1175
|
+
is_cached = os.path.exists(pointer_path) or os.path.exists(blob_path)
|
|
1176
|
+
return DryRunFileInfo(
|
|
1177
|
+
commit_hash=commit_hash,
|
|
1178
|
+
file_size=expected_size,
|
|
1179
|
+
filename=filename,
|
|
1180
|
+
is_cached=is_cached,
|
|
1181
|
+
local_path=pointer_path,
|
|
1182
|
+
will_download=force_download or not is_cached,
|
|
1183
|
+
)
|
|
1184
|
+
|
|
1051
1185
|
os.makedirs(os.path.dirname(blob_path), exist_ok=True)
|
|
1052
1186
|
os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
|
|
1053
1187
|
|
|
@@ -1127,7 +1261,8 @@ def _hf_hub_download_to_local_dir(
|
|
|
1127
1261
|
cache_dir: str,
|
|
1128
1262
|
force_download: bool,
|
|
1129
1263
|
local_files_only: bool,
|
|
1130
|
-
|
|
1264
|
+
dry_run: bool,
|
|
1265
|
+
) -> Union[str, DryRunFileInfo]:
|
|
1131
1266
|
"""Download a given file to a local folder, if not already present.
|
|
1132
1267
|
|
|
1133
1268
|
Method should not be called directly. Please use `hf_hub_download` instead.
|
|
@@ -1142,13 +1277,23 @@ def _hf_hub_download_to_local_dir(
|
|
|
1142
1277
|
|
|
1143
1278
|
# Local file exists + metadata exists + commit_hash matches => return file
|
|
1144
1279
|
if (
|
|
1145
|
-
|
|
1146
|
-
and REGEX_COMMIT_HASH.match(revision)
|
|
1280
|
+
REGEX_COMMIT_HASH.match(revision)
|
|
1147
1281
|
and paths.file_path.is_file()
|
|
1148
1282
|
and local_metadata is not None
|
|
1149
1283
|
and local_metadata.commit_hash == revision
|
|
1150
1284
|
):
|
|
1151
|
-
|
|
1285
|
+
local_file = str(paths.file_path)
|
|
1286
|
+
if dry_run:
|
|
1287
|
+
return DryRunFileInfo(
|
|
1288
|
+
commit_hash=revision,
|
|
1289
|
+
file_size=os.path.getsize(local_file),
|
|
1290
|
+
filename=filename,
|
|
1291
|
+
is_cached=True,
|
|
1292
|
+
local_path=local_file,
|
|
1293
|
+
will_download=force_download,
|
|
1294
|
+
)
|
|
1295
|
+
if not force_download:
|
|
1296
|
+
return local_file
|
|
1152
1297
|
|
|
1153
1298
|
# Local file doesn't exist or commit_hash doesn't match => we need the etag
|
|
1154
1299
|
(url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
|
|
@@ -1165,11 +1310,24 @@ def _hf_hub_download_to_local_dir(
|
|
|
1165
1310
|
|
|
1166
1311
|
if head_call_error is not None:
|
|
1167
1312
|
# No HEAD call but local file exists => default to local file
|
|
1168
|
-
if
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1313
|
+
if paths.file_path.is_file():
|
|
1314
|
+
if dry_run or not force_download:
|
|
1315
|
+
logger.warning(
|
|
1316
|
+
f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
|
|
1317
|
+
)
|
|
1318
|
+
local_path = str(paths.file_path)
|
|
1319
|
+
if dry_run and local_metadata is not None:
|
|
1320
|
+
return DryRunFileInfo(
|
|
1321
|
+
commit_hash=local_metadata.commit_hash,
|
|
1322
|
+
file_size=os.path.getsize(local_path),
|
|
1323
|
+
filename=filename,
|
|
1324
|
+
is_cached=True,
|
|
1325
|
+
local_path=local_path,
|
|
1326
|
+
will_download=force_download,
|
|
1327
|
+
)
|
|
1328
|
+
if not force_download:
|
|
1329
|
+
return local_path
|
|
1330
|
+
|
|
1173
1331
|
# Otherwise => raise
|
|
1174
1332
|
_raise_on_head_call_error(head_call_error, force_download, local_files_only)
|
|
1175
1333
|
|
|
@@ -1184,6 +1342,15 @@ def _hf_hub_download_to_local_dir(
|
|
|
1184
1342
|
# etag matches => update metadata and return file
|
|
1185
1343
|
if local_metadata is not None and local_metadata.etag == etag:
|
|
1186
1344
|
write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
|
|
1345
|
+
if dry_run:
|
|
1346
|
+
return DryRunFileInfo(
|
|
1347
|
+
commit_hash=commit_hash,
|
|
1348
|
+
file_size=expected_size,
|
|
1349
|
+
filename=filename,
|
|
1350
|
+
is_cached=True,
|
|
1351
|
+
local_path=str(paths.file_path),
|
|
1352
|
+
will_download=False,
|
|
1353
|
+
)
|
|
1187
1354
|
return str(paths.file_path)
|
|
1188
1355
|
|
|
1189
1356
|
# metadata is outdated + etag is a sha256
|
|
@@ -1195,6 +1362,15 @@ def _hf_hub_download_to_local_dir(
|
|
|
1195
1362
|
file_hash = sha_fileobj(f).hex()
|
|
1196
1363
|
if file_hash == etag:
|
|
1197
1364
|
write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
|
|
1365
|
+
if dry_run:
|
|
1366
|
+
return DryRunFileInfo(
|
|
1367
|
+
commit_hash=commit_hash,
|
|
1368
|
+
file_size=expected_size,
|
|
1369
|
+
filename=filename,
|
|
1370
|
+
is_cached=True,
|
|
1371
|
+
local_path=str(paths.file_path),
|
|
1372
|
+
will_download=False,
|
|
1373
|
+
)
|
|
1198
1374
|
return str(paths.file_path)
|
|
1199
1375
|
|
|
1200
1376
|
# Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
|
|
@@ -1213,8 +1389,28 @@ def _hf_hub_download_to_local_dir(
|
|
|
1213
1389
|
paths.file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1214
1390
|
shutil.copyfile(cached_path, paths.file_path)
|
|
1215
1391
|
write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
|
|
1392
|
+
if dry_run:
|
|
1393
|
+
return DryRunFileInfo(
|
|
1394
|
+
commit_hash=commit_hash,
|
|
1395
|
+
file_size=expected_size,
|
|
1396
|
+
filename=filename,
|
|
1397
|
+
is_cached=True,
|
|
1398
|
+
local_path=str(paths.file_path),
|
|
1399
|
+
will_download=False,
|
|
1400
|
+
)
|
|
1216
1401
|
return str(paths.file_path)
|
|
1217
1402
|
|
|
1403
|
+
if dry_run:
|
|
1404
|
+
is_cached = paths.file_path.is_file()
|
|
1405
|
+
return DryRunFileInfo(
|
|
1406
|
+
commit_hash=commit_hash,
|
|
1407
|
+
file_size=expected_size,
|
|
1408
|
+
filename=filename,
|
|
1409
|
+
is_cached=is_cached,
|
|
1410
|
+
local_path=str(paths.file_path),
|
|
1411
|
+
will_download=force_download or not is_cached,
|
|
1412
|
+
)
|
|
1413
|
+
|
|
1218
1414
|
# Otherwise, let's download the file!
|
|
1219
1415
|
with WeakFileLock(paths.lock_path):
|
|
1220
1416
|
paths.file_path.unlink(missing_ok=True) # delete outdated file first
|