huggingface-hub 1.0.0rc2__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +4 -7
- huggingface_hub/_login.py +2 -2
- huggingface_hub/_snapshot_download.py +119 -21
- huggingface_hub/_upload_large_folder.py +1 -2
- huggingface_hub/cli/_cli_utils.py +12 -6
- huggingface_hub/cli/download.py +32 -7
- huggingface_hub/dataclasses.py +132 -3
- huggingface_hub/errors.py +4 -0
- huggingface_hub/file_download.py +216 -17
- huggingface_hub/hf_api.py +127 -14
- huggingface_hub/hf_file_system.py +38 -21
- huggingface_hub/inference/_client.py +3 -2
- huggingface_hub/inference/_generated/_async_client.py +3 -2
- huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
- huggingface_hub/inference/_mcp/mcp_client.py +4 -3
- huggingface_hub/inference/_providers/__init__.py +5 -0
- huggingface_hub/inference/_providers/_common.py +1 -0
- huggingface_hub/inference/_providers/fal_ai.py +2 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/utils/__init__.py +1 -2
- huggingface_hub/utils/_cache_manager.py +1 -1
- huggingface_hub/utils/_http.py +10 -38
- huggingface_hub/utils/_validators.py +2 -2
- {huggingface_hub-1.0.0rc2.dist-info → huggingface_hub-1.0.0rc3.dist-info}/METADATA +1 -1
- {huggingface_hub-1.0.0rc2.dist-info → huggingface_hub-1.0.0rc3.dist-info}/RECORD +29 -43
- {huggingface_hub-1.0.0rc2.dist-info → huggingface_hub-1.0.0rc3.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/_cli_utils.py +0 -74
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -195
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -207
- huggingface_hub/commands/version.py +0 -40
- {huggingface_hub-1.0.0rc2.dist-info → huggingface_hub-1.0.0rc3.dist-info}/LICENSE +0 -0
- {huggingface_hub-1.0.0rc2.dist-info → huggingface_hub-1.0.0rc3.dist-info}/WHEEL +0 -0
- {huggingface_hub-1.0.0rc2.dist-info → huggingface_hub-1.0.0rc3.dist-info}/top_level.txt +0 -0
huggingface_hub/file_download.py
CHANGED
|
@@ -9,7 +9,7 @@ import uuid
|
|
|
9
9
|
import warnings
|
|
10
10
|
from dataclasses import dataclass
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Any, BinaryIO, NoReturn, Optional, Union
|
|
12
|
+
from typing import Any, BinaryIO, Literal, NoReturn, Optional, Union, overload
|
|
13
13
|
from urllib.parse import quote, urlparse
|
|
14
14
|
|
|
15
15
|
import httpx
|
|
@@ -149,6 +149,34 @@ class HfFileMetadata:
|
|
|
149
149
|
xet_file_data: Optional[XetFileData]
|
|
150
150
|
|
|
151
151
|
|
|
152
|
+
@dataclass
|
|
153
|
+
class DryRunFileInfo:
|
|
154
|
+
"""Information returned when performing a dry run of a file download.
|
|
155
|
+
|
|
156
|
+
Returned by [`hf_hub_download`] when `dry_run=True`.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
commit_hash (`str`):
|
|
160
|
+
The commit_hash related to the file.
|
|
161
|
+
file_size (`int`):
|
|
162
|
+
Size of the file. In case of an LFS file, contains the size of the actual LFS file, not the pointer.
|
|
163
|
+
filename (`str`):
|
|
164
|
+
Name of the file in the repo.
|
|
165
|
+
is_cached (`bool`):
|
|
166
|
+
Whether the file is already cached locally.
|
|
167
|
+
will_download (`bool`):
|
|
168
|
+
Whether the file will be downloaded if `hf_hub_download` is called with `dry_run=False`.
|
|
169
|
+
In practice, will_download is `True` if the file is not cached or if `force_download=True`.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
commit_hash: str
|
|
173
|
+
file_size: int
|
|
174
|
+
filename: str
|
|
175
|
+
local_path: str
|
|
176
|
+
is_cached: bool
|
|
177
|
+
will_download: bool
|
|
178
|
+
|
|
179
|
+
|
|
152
180
|
@validate_hf_hub_args
|
|
153
181
|
def hf_hub_url(
|
|
154
182
|
repo_id: str,
|
|
@@ -763,6 +791,75 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
|
|
|
763
791
|
pass
|
|
764
792
|
|
|
765
793
|
|
|
794
|
+
@overload
|
|
795
|
+
def hf_hub_download(
|
|
796
|
+
repo_id: str,
|
|
797
|
+
filename: str,
|
|
798
|
+
*,
|
|
799
|
+
subfolder: Optional[str] = None,
|
|
800
|
+
repo_type: Optional[str] = None,
|
|
801
|
+
revision: Optional[str] = None,
|
|
802
|
+
library_name: Optional[str] = None,
|
|
803
|
+
library_version: Optional[str] = None,
|
|
804
|
+
cache_dir: Union[str, Path, None] = None,
|
|
805
|
+
local_dir: Union[str, Path, None] = None,
|
|
806
|
+
user_agent: Union[dict, str, None] = None,
|
|
807
|
+
force_download: bool = False,
|
|
808
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
809
|
+
token: Union[bool, str, None] = None,
|
|
810
|
+
local_files_only: bool = False,
|
|
811
|
+
headers: Optional[dict[str, str]] = None,
|
|
812
|
+
endpoint: Optional[str] = None,
|
|
813
|
+
dry_run: Literal[False] = False,
|
|
814
|
+
) -> str: ...
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
@overload
|
|
818
|
+
def hf_hub_download(
|
|
819
|
+
repo_id: str,
|
|
820
|
+
filename: str,
|
|
821
|
+
*,
|
|
822
|
+
subfolder: Optional[str] = None,
|
|
823
|
+
repo_type: Optional[str] = None,
|
|
824
|
+
revision: Optional[str] = None,
|
|
825
|
+
library_name: Optional[str] = None,
|
|
826
|
+
library_version: Optional[str] = None,
|
|
827
|
+
cache_dir: Union[str, Path, None] = None,
|
|
828
|
+
local_dir: Union[str, Path, None] = None,
|
|
829
|
+
user_agent: Union[dict, str, None] = None,
|
|
830
|
+
force_download: bool = False,
|
|
831
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
832
|
+
token: Union[bool, str, None] = None,
|
|
833
|
+
local_files_only: bool = False,
|
|
834
|
+
headers: Optional[dict[str, str]] = None,
|
|
835
|
+
endpoint: Optional[str] = None,
|
|
836
|
+
dry_run: Literal[True] = True,
|
|
837
|
+
) -> DryRunFileInfo: ...
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
@overload
|
|
841
|
+
def hf_hub_download(
|
|
842
|
+
repo_id: str,
|
|
843
|
+
filename: str,
|
|
844
|
+
*,
|
|
845
|
+
subfolder: Optional[str] = None,
|
|
846
|
+
repo_type: Optional[str] = None,
|
|
847
|
+
revision: Optional[str] = None,
|
|
848
|
+
library_name: Optional[str] = None,
|
|
849
|
+
library_version: Optional[str] = None,
|
|
850
|
+
cache_dir: Union[str, Path, None] = None,
|
|
851
|
+
local_dir: Union[str, Path, None] = None,
|
|
852
|
+
user_agent: Union[dict, str, None] = None,
|
|
853
|
+
force_download: bool = False,
|
|
854
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
855
|
+
token: Union[bool, str, None] = None,
|
|
856
|
+
local_files_only: bool = False,
|
|
857
|
+
headers: Optional[dict[str, str]] = None,
|
|
858
|
+
endpoint: Optional[str] = None,
|
|
859
|
+
dry_run: bool = False,
|
|
860
|
+
) -> Union[str, DryRunFileInfo]: ...
|
|
861
|
+
|
|
862
|
+
|
|
766
863
|
@validate_hf_hub_args
|
|
767
864
|
def hf_hub_download(
|
|
768
865
|
repo_id: str,
|
|
@@ -782,7 +879,8 @@ def hf_hub_download(
|
|
|
782
879
|
local_files_only: bool = False,
|
|
783
880
|
headers: Optional[dict[str, str]] = None,
|
|
784
881
|
endpoint: Optional[str] = None,
|
|
785
|
-
|
|
882
|
+
dry_run: bool = False,
|
|
883
|
+
) -> Union[str, DryRunFileInfo]:
|
|
786
884
|
"""Download a given file if it's not already present in the local cache.
|
|
787
885
|
|
|
788
886
|
The new cache file layout looks like this:
|
|
@@ -857,9 +955,14 @@ def hf_hub_download(
|
|
|
857
955
|
local cached file if it exists.
|
|
858
956
|
headers (`dict`, *optional*):
|
|
859
957
|
Additional headers to be sent with the request.
|
|
958
|
+
dry_run (`bool`, *optional*, defaults to `False`):
|
|
959
|
+
If `True`, perform a dry run without actually downloading the file. Returns a
|
|
960
|
+
[`DryRunFileInfo`] object containing information about what would be downloaded.
|
|
860
961
|
|
|
861
962
|
Returns:
|
|
862
|
-
`str
|
|
963
|
+
`str` or [`DryRunFileInfo`]:
|
|
964
|
+
- If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
|
|
965
|
+
- If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
|
|
863
966
|
|
|
864
967
|
Raises:
|
|
865
968
|
[`~utils.RepositoryNotFoundError`]
|
|
@@ -929,6 +1032,7 @@ def hf_hub_download(
|
|
|
929
1032
|
cache_dir=cache_dir,
|
|
930
1033
|
force_download=force_download,
|
|
931
1034
|
local_files_only=local_files_only,
|
|
1035
|
+
dry_run=dry_run,
|
|
932
1036
|
)
|
|
933
1037
|
else:
|
|
934
1038
|
return _hf_hub_download_to_cache_dir(
|
|
@@ -947,6 +1051,7 @@ def hf_hub_download(
|
|
|
947
1051
|
# Additional options
|
|
948
1052
|
local_files_only=local_files_only,
|
|
949
1053
|
force_download=force_download,
|
|
1054
|
+
dry_run=dry_run,
|
|
950
1055
|
)
|
|
951
1056
|
|
|
952
1057
|
|
|
@@ -967,7 +1072,8 @@ def _hf_hub_download_to_cache_dir(
|
|
|
967
1072
|
# Additional options
|
|
968
1073
|
local_files_only: bool,
|
|
969
1074
|
force_download: bool,
|
|
970
|
-
|
|
1075
|
+
dry_run: bool,
|
|
1076
|
+
) -> Union[str, DryRunFileInfo]:
|
|
971
1077
|
"""Download a given file to a cache folder, if not already present.
|
|
972
1078
|
|
|
973
1079
|
Method should not be called directly. Please use `hf_hub_download` instead.
|
|
@@ -987,8 +1093,18 @@ def _hf_hub_download_to_cache_dir(
|
|
|
987
1093
|
# if user provides a commit_hash and they already have the file on disk, shortcut everything.
|
|
988
1094
|
if REGEX_COMMIT_HASH.match(revision):
|
|
989
1095
|
pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
|
|
990
|
-
if os.path.exists(pointer_path)
|
|
991
|
-
|
|
1096
|
+
if os.path.exists(pointer_path):
|
|
1097
|
+
if dry_run:
|
|
1098
|
+
return DryRunFileInfo(
|
|
1099
|
+
commit_hash=revision,
|
|
1100
|
+
file_size=os.path.getsize(pointer_path),
|
|
1101
|
+
filename=filename,
|
|
1102
|
+
is_cached=True,
|
|
1103
|
+
local_path=pointer_path,
|
|
1104
|
+
will_download=force_download,
|
|
1105
|
+
)
|
|
1106
|
+
if not force_download:
|
|
1107
|
+
return pointer_path
|
|
992
1108
|
|
|
993
1109
|
# Try to get metadata (etag, commit_hash, url, size) from the server.
|
|
994
1110
|
# If we can't, a HEAD request error is returned.
|
|
@@ -1031,8 +1147,18 @@ def _hf_hub_download_to_cache_dir(
|
|
|
1031
1147
|
# Return pointer file if exists
|
|
1032
1148
|
if commit_hash is not None:
|
|
1033
1149
|
pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
|
|
1034
|
-
if os.path.exists(pointer_path)
|
|
1035
|
-
|
|
1150
|
+
if os.path.exists(pointer_path):
|
|
1151
|
+
if dry_run:
|
|
1152
|
+
return DryRunFileInfo(
|
|
1153
|
+
commit_hash=commit_hash,
|
|
1154
|
+
file_size=os.path.getsize(pointer_path),
|
|
1155
|
+
filename=filename,
|
|
1156
|
+
is_cached=True,
|
|
1157
|
+
local_path=pointer_path,
|
|
1158
|
+
will_download=force_download,
|
|
1159
|
+
)
|
|
1160
|
+
if not force_download:
|
|
1161
|
+
return pointer_path
|
|
1036
1162
|
|
|
1037
1163
|
# Otherwise, raise appropriate error
|
|
1038
1164
|
_raise_on_head_call_error(head_call_error, force_download, local_files_only)
|
|
@@ -1045,6 +1171,17 @@ def _hf_hub_download_to_cache_dir(
|
|
|
1045
1171
|
blob_path = os.path.join(storage_folder, "blobs", etag)
|
|
1046
1172
|
pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
|
|
1047
1173
|
|
|
1174
|
+
if dry_run:
|
|
1175
|
+
is_cached = os.path.exists(pointer_path) or os.path.exists(blob_path)
|
|
1176
|
+
return DryRunFileInfo(
|
|
1177
|
+
commit_hash=commit_hash,
|
|
1178
|
+
file_size=expected_size,
|
|
1179
|
+
filename=filename,
|
|
1180
|
+
is_cached=is_cached,
|
|
1181
|
+
local_path=pointer_path,
|
|
1182
|
+
will_download=force_download or not is_cached,
|
|
1183
|
+
)
|
|
1184
|
+
|
|
1048
1185
|
os.makedirs(os.path.dirname(blob_path), exist_ok=True)
|
|
1049
1186
|
os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
|
|
1050
1187
|
|
|
@@ -1124,7 +1261,8 @@ def _hf_hub_download_to_local_dir(
|
|
|
1124
1261
|
cache_dir: str,
|
|
1125
1262
|
force_download: bool,
|
|
1126
1263
|
local_files_only: bool,
|
|
1127
|
-
|
|
1264
|
+
dry_run: bool,
|
|
1265
|
+
) -> Union[str, DryRunFileInfo]:
|
|
1128
1266
|
"""Download a given file to a local folder, if not already present.
|
|
1129
1267
|
|
|
1130
1268
|
Method should not be called directly. Please use `hf_hub_download` instead.
|
|
@@ -1139,13 +1277,23 @@ def _hf_hub_download_to_local_dir(
|
|
|
1139
1277
|
|
|
1140
1278
|
# Local file exists + metadata exists + commit_hash matches => return file
|
|
1141
1279
|
if (
|
|
1142
|
-
|
|
1143
|
-
and REGEX_COMMIT_HASH.match(revision)
|
|
1280
|
+
REGEX_COMMIT_HASH.match(revision)
|
|
1144
1281
|
and paths.file_path.is_file()
|
|
1145
1282
|
and local_metadata is not None
|
|
1146
1283
|
and local_metadata.commit_hash == revision
|
|
1147
1284
|
):
|
|
1148
|
-
|
|
1285
|
+
local_file = str(paths.file_path)
|
|
1286
|
+
if dry_run:
|
|
1287
|
+
return DryRunFileInfo(
|
|
1288
|
+
commit_hash=revision,
|
|
1289
|
+
file_size=os.path.getsize(local_file),
|
|
1290
|
+
filename=filename,
|
|
1291
|
+
is_cached=True,
|
|
1292
|
+
local_path=local_file,
|
|
1293
|
+
will_download=force_download,
|
|
1294
|
+
)
|
|
1295
|
+
if not force_download:
|
|
1296
|
+
return local_file
|
|
1149
1297
|
|
|
1150
1298
|
# Local file doesn't exist or commit_hash doesn't match => we need the etag
|
|
1151
1299
|
(url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
|
|
@@ -1162,11 +1310,24 @@ def _hf_hub_download_to_local_dir(
|
|
|
1162
1310
|
|
|
1163
1311
|
if head_call_error is not None:
|
|
1164
1312
|
# No HEAD call but local file exists => default to local file
|
|
1165
|
-
if
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1313
|
+
if paths.file_path.is_file():
|
|
1314
|
+
if dry_run or not force_download:
|
|
1315
|
+
logger.warning(
|
|
1316
|
+
f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
|
|
1317
|
+
)
|
|
1318
|
+
local_path = str(paths.file_path)
|
|
1319
|
+
if dry_run and local_metadata is not None:
|
|
1320
|
+
return DryRunFileInfo(
|
|
1321
|
+
commit_hash=local_metadata.commit_hash,
|
|
1322
|
+
file_size=os.path.getsize(local_path),
|
|
1323
|
+
filename=filename,
|
|
1324
|
+
is_cached=True,
|
|
1325
|
+
local_path=local_path,
|
|
1326
|
+
will_download=force_download,
|
|
1327
|
+
)
|
|
1328
|
+
if not force_download:
|
|
1329
|
+
return local_path
|
|
1330
|
+
|
|
1170
1331
|
# Otherwise => raise
|
|
1171
1332
|
_raise_on_head_call_error(head_call_error, force_download, local_files_only)
|
|
1172
1333
|
|
|
@@ -1181,6 +1342,15 @@ def _hf_hub_download_to_local_dir(
|
|
|
1181
1342
|
# etag matches => update metadata and return file
|
|
1182
1343
|
if local_metadata is not None and local_metadata.etag == etag:
|
|
1183
1344
|
write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
|
|
1345
|
+
if dry_run:
|
|
1346
|
+
return DryRunFileInfo(
|
|
1347
|
+
commit_hash=commit_hash,
|
|
1348
|
+
file_size=expected_size,
|
|
1349
|
+
filename=filename,
|
|
1350
|
+
is_cached=True,
|
|
1351
|
+
local_path=str(paths.file_path),
|
|
1352
|
+
will_download=False,
|
|
1353
|
+
)
|
|
1184
1354
|
return str(paths.file_path)
|
|
1185
1355
|
|
|
1186
1356
|
# metadata is outdated + etag is a sha256
|
|
@@ -1192,6 +1362,15 @@ def _hf_hub_download_to_local_dir(
|
|
|
1192
1362
|
file_hash = sha_fileobj(f).hex()
|
|
1193
1363
|
if file_hash == etag:
|
|
1194
1364
|
write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
|
|
1365
|
+
if dry_run:
|
|
1366
|
+
return DryRunFileInfo(
|
|
1367
|
+
commit_hash=commit_hash,
|
|
1368
|
+
file_size=expected_size,
|
|
1369
|
+
filename=filename,
|
|
1370
|
+
is_cached=True,
|
|
1371
|
+
local_path=str(paths.file_path),
|
|
1372
|
+
will_download=False,
|
|
1373
|
+
)
|
|
1195
1374
|
return str(paths.file_path)
|
|
1196
1375
|
|
|
1197
1376
|
# Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
|
|
@@ -1210,8 +1389,28 @@ def _hf_hub_download_to_local_dir(
|
|
|
1210
1389
|
paths.file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1211
1390
|
shutil.copyfile(cached_path, paths.file_path)
|
|
1212
1391
|
write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
|
|
1392
|
+
if dry_run:
|
|
1393
|
+
return DryRunFileInfo(
|
|
1394
|
+
commit_hash=commit_hash,
|
|
1395
|
+
file_size=expected_size,
|
|
1396
|
+
filename=filename,
|
|
1397
|
+
is_cached=True,
|
|
1398
|
+
local_path=str(paths.file_path),
|
|
1399
|
+
will_download=False,
|
|
1400
|
+
)
|
|
1213
1401
|
return str(paths.file_path)
|
|
1214
1402
|
|
|
1403
|
+
if dry_run:
|
|
1404
|
+
is_cached = paths.file_path.is_file()
|
|
1405
|
+
return DryRunFileInfo(
|
|
1406
|
+
commit_hash=commit_hash,
|
|
1407
|
+
file_size=expected_size,
|
|
1408
|
+
filename=filename,
|
|
1409
|
+
is_cached=is_cached,
|
|
1410
|
+
local_path=str(paths.file_path),
|
|
1411
|
+
will_download=force_download or not is_cached,
|
|
1412
|
+
)
|
|
1413
|
+
|
|
1215
1414
|
# Otherwise, let's download the file!
|
|
1216
1415
|
with WeakFileLock(paths.lock_path):
|
|
1217
1416
|
paths.file_path.unlink(missing_ok=True) # delete outdated file first
|
huggingface_hub/hf_api.py
CHANGED
|
@@ -60,7 +60,7 @@ from ._commit_api import (
|
|
|
60
60
|
_warn_on_overwriting_operations,
|
|
61
61
|
)
|
|
62
62
|
from ._inference_endpoints import InferenceEndpoint, InferenceEndpointType
|
|
63
|
-
from ._jobs_api import JobInfo, ScheduledJobInfo, _create_job_spec
|
|
63
|
+
from ._jobs_api import JobInfo, JobSpec, ScheduledJobInfo, _create_job_spec
|
|
64
64
|
from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable
|
|
65
65
|
from ._upload_large_folder import upload_large_folder_internal
|
|
66
66
|
from .community import (
|
|
@@ -79,7 +79,7 @@ from .errors import (
|
|
|
79
79
|
RepositoryNotFoundError,
|
|
80
80
|
RevisionNotFoundError,
|
|
81
81
|
)
|
|
82
|
-
from .file_download import HfFileMetadata, get_hf_file_metadata, hf_hub_url
|
|
82
|
+
from .file_download import DryRunFileInfo, HfFileMetadata, get_hf_file_metadata, hf_hub_url
|
|
83
83
|
from .repocard_data import DatasetCardData, ModelCardData, SpaceCardData
|
|
84
84
|
from .utils import (
|
|
85
85
|
DEFAULT_IGNORE_PATTERNS,
|
|
@@ -466,11 +466,15 @@ class WebhookWatchedItem:
|
|
|
466
466
|
class WebhookInfo:
|
|
467
467
|
"""Data structure containing information about a webhook.
|
|
468
468
|
|
|
469
|
+
One of `url` or `job` is specified, but not both.
|
|
470
|
+
|
|
469
471
|
Attributes:
|
|
470
472
|
id (`str`):
|
|
471
473
|
ID of the webhook.
|
|
472
|
-
url (`str
|
|
474
|
+
url (`str`, *optional*):
|
|
473
475
|
URL of the webhook.
|
|
476
|
+
job (`JobSpec`, *optional*):
|
|
477
|
+
Specifications of the Job to trigger.
|
|
474
478
|
watched (`list[WebhookWatchedItem]`):
|
|
475
479
|
List of items watched by the webhook, see [`WebhookWatchedItem`].
|
|
476
480
|
domains (`list[WEBHOOK_DOMAIN_T]`):
|
|
@@ -482,7 +486,8 @@ class WebhookInfo:
|
|
|
482
486
|
"""
|
|
483
487
|
|
|
484
488
|
id: str
|
|
485
|
-
url: str
|
|
489
|
+
url: Optional[str]
|
|
490
|
+
job: Optional[JobSpec]
|
|
486
491
|
watched: list[WebhookWatchedItem]
|
|
487
492
|
domains: list[constants.WEBHOOK_DOMAIN_T]
|
|
488
493
|
secret: Optional[str]
|
|
@@ -5110,6 +5115,42 @@ class HfApi:
|
|
|
5110
5115
|
endpoint=self.endpoint,
|
|
5111
5116
|
)
|
|
5112
5117
|
|
|
5118
|
+
@overload
|
|
5119
|
+
def hf_hub_download(
|
|
5120
|
+
self,
|
|
5121
|
+
repo_id: str,
|
|
5122
|
+
filename: str,
|
|
5123
|
+
*,
|
|
5124
|
+
subfolder: Optional[str] = None,
|
|
5125
|
+
repo_type: Optional[str] = None,
|
|
5126
|
+
revision: Optional[str] = None,
|
|
5127
|
+
cache_dir: Union[str, Path, None] = None,
|
|
5128
|
+
local_dir: Union[str, Path, None] = None,
|
|
5129
|
+
force_download: bool = False,
|
|
5130
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
5131
|
+
token: Union[bool, str, None] = None,
|
|
5132
|
+
local_files_only: bool = False,
|
|
5133
|
+
dry_run: Literal[False] = False,
|
|
5134
|
+
) -> str: ...
|
|
5135
|
+
|
|
5136
|
+
@overload
|
|
5137
|
+
def hf_hub_download(
|
|
5138
|
+
self,
|
|
5139
|
+
repo_id: str,
|
|
5140
|
+
filename: str,
|
|
5141
|
+
*,
|
|
5142
|
+
subfolder: Optional[str] = None,
|
|
5143
|
+
repo_type: Optional[str] = None,
|
|
5144
|
+
revision: Optional[str] = None,
|
|
5145
|
+
cache_dir: Union[str, Path, None] = None,
|
|
5146
|
+
local_dir: Union[str, Path, None] = None,
|
|
5147
|
+
force_download: bool = False,
|
|
5148
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
5149
|
+
token: Union[bool, str, None] = None,
|
|
5150
|
+
local_files_only: bool = False,
|
|
5151
|
+
dry_run: Literal[True],
|
|
5152
|
+
) -> DryRunFileInfo: ...
|
|
5153
|
+
|
|
5113
5154
|
@validate_hf_hub_args
|
|
5114
5155
|
def hf_hub_download(
|
|
5115
5156
|
self,
|
|
@@ -5125,7 +5166,8 @@ class HfApi:
|
|
|
5125
5166
|
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
5126
5167
|
token: Union[bool, str, None] = None,
|
|
5127
5168
|
local_files_only: bool = False,
|
|
5128
|
-
|
|
5169
|
+
dry_run: bool = False,
|
|
5170
|
+
) -> Union[str, DryRunFileInfo]:
|
|
5129
5171
|
"""Download a given file if it's not already present in the local cache.
|
|
5130
5172
|
|
|
5131
5173
|
The new cache file layout looks like this:
|
|
@@ -5192,9 +5234,14 @@ class HfApi:
|
|
|
5192
5234
|
local_files_only (`bool`, *optional*, defaults to `False`):
|
|
5193
5235
|
If `True`, avoid downloading the file and return the path to the
|
|
5194
5236
|
local cached file if it exists.
|
|
5237
|
+
dry_run (`bool`, *optional*, defaults to `False`):
|
|
5238
|
+
If `True`, perform a dry run without actually downloading the file. Returns a
|
|
5239
|
+
[`DryRunFileInfo`] object containing information about what would be downloaded.
|
|
5195
5240
|
|
|
5196
5241
|
Returns:
|
|
5197
|
-
`str
|
|
5242
|
+
`str` or [`DryRunFileInfo`]:
|
|
5243
|
+
- If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
|
|
5244
|
+
- If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
|
|
5198
5245
|
|
|
5199
5246
|
Raises:
|
|
5200
5247
|
[`~utils.RepositoryNotFoundError`]
|
|
@@ -8815,6 +8862,7 @@ class HfApi:
|
|
|
8815
8862
|
>>> print(webhook)
|
|
8816
8863
|
WebhookInfo(
|
|
8817
8864
|
id="654bbbc16f2ec14d77f109cc",
|
|
8865
|
+
job=None,
|
|
8818
8866
|
watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
|
|
8819
8867
|
url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
|
|
8820
8868
|
secret="my-secret",
|
|
@@ -8834,7 +8882,8 @@ class HfApi:
|
|
|
8834
8882
|
|
|
8835
8883
|
webhook = WebhookInfo(
|
|
8836
8884
|
id=webhook_data["id"],
|
|
8837
|
-
url=webhook_data
|
|
8885
|
+
url=webhook_data.get("url"),
|
|
8886
|
+
job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
|
|
8838
8887
|
watched=watched_items,
|
|
8839
8888
|
domains=webhook_data["domains"],
|
|
8840
8889
|
secret=webhook_data.get("secret"),
|
|
@@ -8884,7 +8933,8 @@ class HfApi:
|
|
|
8884
8933
|
return [
|
|
8885
8934
|
WebhookInfo(
|
|
8886
8935
|
id=webhook["id"],
|
|
8887
|
-
url=webhook
|
|
8936
|
+
url=webhook.get("url"),
|
|
8937
|
+
job=JobSpec(**webhook["job"]) if webhook.get("job") else None,
|
|
8888
8938
|
watched=[WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook["watched"]],
|
|
8889
8939
|
domains=webhook["domains"],
|
|
8890
8940
|
secret=webhook.get("secret"),
|
|
@@ -8897,7 +8947,8 @@ class HfApi:
|
|
|
8897
8947
|
def create_webhook(
|
|
8898
8948
|
self,
|
|
8899
8949
|
*,
|
|
8900
|
-
url: str,
|
|
8950
|
+
url: Optional[str] = None,
|
|
8951
|
+
job_id: Optional[str] = None,
|
|
8901
8952
|
watched: list[Union[dict, WebhookWatchedItem]],
|
|
8902
8953
|
domains: Optional[list[constants.WEBHOOK_DOMAIN_T]] = None,
|
|
8903
8954
|
secret: Optional[str] = None,
|
|
@@ -8905,9 +8956,15 @@ class HfApi:
|
|
|
8905
8956
|
) -> WebhookInfo:
|
|
8906
8957
|
"""Create a new webhook.
|
|
8907
8958
|
|
|
8959
|
+
The webhook can either send a payload to a URL, or trigger a Job to run on Hugging Face infrastructure.
|
|
8960
|
+
This function should be called with one of `url` or `job_id`, but not both.
|
|
8961
|
+
|
|
8908
8962
|
Args:
|
|
8909
8963
|
url (`str`):
|
|
8910
8964
|
URL to send the payload to.
|
|
8965
|
+
job_id (`str`):
|
|
8966
|
+
ID of the source Job to trigger with the webhook payload in the environment variable WEBHOOK_PAYLOAD.
|
|
8967
|
+
Additional environment variables are available for convenience: WEBHOOK_REPO_ID, WEBHOOK_REPO_TYPE and WEBHOOK_SECRET.
|
|
8911
8968
|
watched (`list[WebhookWatchedItem]`):
|
|
8912
8969
|
List of [`WebhookWatchedItem`] to be watched by the webhook. It can be users, orgs, models, datasets or spaces.
|
|
8913
8970
|
Watched items can also be provided as plain dictionaries.
|
|
@@ -8925,6 +8982,8 @@ class HfApi:
|
|
|
8925
8982
|
Info about the newly created webhook.
|
|
8926
8983
|
|
|
8927
8984
|
Example:
|
|
8985
|
+
|
|
8986
|
+
Create a webhook that sends a payload to a URL
|
|
8928
8987
|
```python
|
|
8929
8988
|
>>> from huggingface_hub import create_webhook
|
|
8930
8989
|
>>> payload = create_webhook(
|
|
@@ -8937,6 +8996,43 @@ class HfApi:
|
|
|
8937
8996
|
WebhookInfo(
|
|
8938
8997
|
id="654bbbc16f2ec14d77f109cc",
|
|
8939
8998
|
url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
|
|
8999
|
+
job=None,
|
|
9000
|
+
watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
|
|
9001
|
+
domains=["repo", "discussion"],
|
|
9002
|
+
secret="my-secret",
|
|
9003
|
+
disabled=False,
|
|
9004
|
+
)
|
|
9005
|
+
```
|
|
9006
|
+
|
|
9007
|
+
Run a Job and then create a webhook that triggers this Job
|
|
9008
|
+
```python
|
|
9009
|
+
>>> from huggingface_hub import create_webhook, run_job
|
|
9010
|
+
>>> job = run_job(
|
|
9011
|
+
... image="ubuntu",
|
|
9012
|
+
... command=["bash", "-c", r"echo An event occured in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD"],
|
|
9013
|
+
... )
|
|
9014
|
+
>>> payload = create_webhook(
|
|
9015
|
+
... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}],
|
|
9016
|
+
... job_id=job.id,
|
|
9017
|
+
... domains=["repo", "discussion"],
|
|
9018
|
+
... secret="my-secret",
|
|
9019
|
+
... )
|
|
9020
|
+
>>> print(payload)
|
|
9021
|
+
WebhookInfo(
|
|
9022
|
+
id="654bbbc16f2ec14d77f109cc",
|
|
9023
|
+
url=None,
|
|
9024
|
+
job=JobSpec(
|
|
9025
|
+
docker_image='ubuntu',
|
|
9026
|
+
space_id=None,
|
|
9027
|
+
command=['bash', '-c', 'echo An event occured in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD'],
|
|
9028
|
+
arguments=[],
|
|
9029
|
+
environment={},
|
|
9030
|
+
secrets=[],
|
|
9031
|
+
flavor='cpu-basic',
|
|
9032
|
+
timeout=None,
|
|
9033
|
+
tags=None,
|
|
9034
|
+
arch=None
|
|
9035
|
+
),
|
|
8940
9036
|
watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
|
|
8941
9037
|
domains=["repo", "discussion"],
|
|
8942
9038
|
secret="my-secret",
|
|
@@ -8946,9 +9042,19 @@ class HfApi:
|
|
|
8946
9042
|
"""
|
|
8947
9043
|
watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched]
|
|
8948
9044
|
|
|
9045
|
+
post_webhooks_json = {"watched": watched_dicts, "domains": domains, "secret": secret}
|
|
9046
|
+
if url is not None and job_id is not None:
|
|
9047
|
+
raise ValueError("Set `url` or `job_id` but not both.")
|
|
9048
|
+
elif url is not None:
|
|
9049
|
+
post_webhooks_json["url"] = url
|
|
9050
|
+
elif job_id is not None:
|
|
9051
|
+
post_webhooks_json["jobSourceId"] = job_id
|
|
9052
|
+
else:
|
|
9053
|
+
raise ValueError("Missing argument for webhook: `url` or `job_id`.")
|
|
9054
|
+
|
|
8949
9055
|
response = get_session().post(
|
|
8950
9056
|
f"{constants.ENDPOINT}/api/settings/webhooks",
|
|
8951
|
-
json=
|
|
9057
|
+
json=post_webhooks_json,
|
|
8952
9058
|
headers=self._build_hf_headers(token=token),
|
|
8953
9059
|
)
|
|
8954
9060
|
hf_raise_for_status(response)
|
|
@@ -8957,7 +9063,8 @@ class HfApi:
|
|
|
8957
9063
|
|
|
8958
9064
|
webhook = WebhookInfo(
|
|
8959
9065
|
id=webhook_data["id"],
|
|
8960
|
-
url=webhook_data
|
|
9066
|
+
url=webhook_data.get("url"),
|
|
9067
|
+
job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
|
|
8961
9068
|
watched=watched_items,
|
|
8962
9069
|
domains=webhook_data["domains"],
|
|
8963
9070
|
secret=webhook_data.get("secret"),
|
|
@@ -9013,6 +9120,7 @@ class HfApi:
|
|
|
9013
9120
|
>>> print(updated_payload)
|
|
9014
9121
|
WebhookInfo(
|
|
9015
9122
|
id="654bbbc16f2ec14d77f109cc",
|
|
9123
|
+
job=None,
|
|
9016
9124
|
url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
|
|
9017
9125
|
watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
|
|
9018
9126
|
domains=["repo"],
|
|
@@ -9036,7 +9144,8 @@ class HfApi:
|
|
|
9036
9144
|
|
|
9037
9145
|
webhook = WebhookInfo(
|
|
9038
9146
|
id=webhook_data["id"],
|
|
9039
|
-
url=webhook_data
|
|
9147
|
+
url=webhook_data.get("url"),
|
|
9148
|
+
job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
|
|
9040
9149
|
watched=watched_items,
|
|
9041
9150
|
domains=webhook_data["domains"],
|
|
9042
9151
|
secret=webhook_data.get("secret"),
|
|
@@ -9068,6 +9177,7 @@ class HfApi:
|
|
|
9068
9177
|
>>> enabled_webhook
|
|
9069
9178
|
WebhookInfo(
|
|
9070
9179
|
id="654bbbc16f2ec14d77f109cc",
|
|
9180
|
+
job=None,
|
|
9071
9181
|
url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
|
|
9072
9182
|
watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
|
|
9073
9183
|
domains=["repo", "discussion"],
|
|
@@ -9087,7 +9197,8 @@ class HfApi:
|
|
|
9087
9197
|
|
|
9088
9198
|
webhook = WebhookInfo(
|
|
9089
9199
|
id=webhook_data["id"],
|
|
9090
|
-
url=webhook_data
|
|
9200
|
+
url=webhook_data.get("url"),
|
|
9201
|
+
job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
|
|
9091
9202
|
watched=watched_items,
|
|
9092
9203
|
domains=webhook_data["domains"],
|
|
9093
9204
|
secret=webhook_data.get("secret"),
|
|
@@ -9120,6 +9231,7 @@ class HfApi:
|
|
|
9120
9231
|
WebhookInfo(
|
|
9121
9232
|
id="654bbbc16f2ec14d77f109cc",
|
|
9122
9233
|
url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548",
|
|
9234
|
+
jon=None,
|
|
9123
9235
|
watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")],
|
|
9124
9236
|
domains=["repo", "discussion"],
|
|
9125
9237
|
secret="my-secret",
|
|
@@ -9138,7 +9250,8 @@ class HfApi:
|
|
|
9138
9250
|
|
|
9139
9251
|
webhook = WebhookInfo(
|
|
9140
9252
|
id=webhook_data["id"],
|
|
9141
|
-
url=webhook_data
|
|
9253
|
+
url=webhook_data.get("url"),
|
|
9254
|
+
job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None,
|
|
9142
9255
|
watched=watched_items,
|
|
9143
9256
|
domains=webhook_data["domains"],
|
|
9144
9257
|
secret=webhook_data.get("secret"),
|