huggingface-hub 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (59) hide show
  1. huggingface_hub/__init__.py +4 -7
  2. huggingface_hub/_commit_api.py +126 -66
  3. huggingface_hub/_commit_scheduler.py +4 -7
  4. huggingface_hub/_login.py +10 -16
  5. huggingface_hub/_snapshot_download.py +119 -21
  6. huggingface_hub/_tensorboard_logger.py +2 -5
  7. huggingface_hub/_upload_large_folder.py +1 -2
  8. huggingface_hub/_webhooks_server.py +8 -20
  9. huggingface_hub/cli/_cli_utils.py +12 -6
  10. huggingface_hub/cli/download.py +32 -7
  11. huggingface_hub/cli/repo.py +137 -5
  12. huggingface_hub/dataclasses.py +122 -2
  13. huggingface_hub/errors.py +4 -0
  14. huggingface_hub/fastai_utils.py +22 -32
  15. huggingface_hub/file_download.py +234 -38
  16. huggingface_hub/hf_api.py +385 -424
  17. huggingface_hub/hf_file_system.py +55 -65
  18. huggingface_hub/inference/_client.py +27 -48
  19. huggingface_hub/inference/_generated/_async_client.py +27 -48
  20. huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
  21. huggingface_hub/inference/_mcp/agent.py +2 -5
  22. huggingface_hub/inference/_mcp/mcp_client.py +6 -8
  23. huggingface_hub/inference/_providers/__init__.py +16 -0
  24. huggingface_hub/inference/_providers/_common.py +2 -0
  25. huggingface_hub/inference/_providers/fal_ai.py +2 -0
  26. huggingface_hub/inference/_providers/publicai.py +6 -0
  27. huggingface_hub/inference/_providers/scaleway.py +28 -0
  28. huggingface_hub/inference/_providers/zai_org.py +17 -0
  29. huggingface_hub/lfs.py +14 -8
  30. huggingface_hub/repocard.py +12 -16
  31. huggingface_hub/serialization/_base.py +3 -6
  32. huggingface_hub/serialization/_torch.py +16 -34
  33. huggingface_hub/utils/__init__.py +1 -2
  34. huggingface_hub/utils/_cache_manager.py +42 -72
  35. huggingface_hub/utils/_chunk_utils.py +2 -3
  36. huggingface_hub/utils/_http.py +37 -68
  37. huggingface_hub/utils/_validators.py +2 -2
  38. huggingface_hub/utils/logging.py +8 -11
  39. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/METADATA +2 -2
  40. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/RECORD +44 -56
  41. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/entry_points.txt +0 -1
  42. huggingface_hub/commands/__init__.py +0 -27
  43. huggingface_hub/commands/_cli_utils.py +0 -74
  44. huggingface_hub/commands/delete_cache.py +0 -476
  45. huggingface_hub/commands/download.py +0 -195
  46. huggingface_hub/commands/env.py +0 -39
  47. huggingface_hub/commands/huggingface_cli.py +0 -65
  48. huggingface_hub/commands/lfs.py +0 -200
  49. huggingface_hub/commands/repo.py +0 -151
  50. huggingface_hub/commands/repo_files.py +0 -132
  51. huggingface_hub/commands/scan_cache.py +0 -183
  52. huggingface_hub/commands/tag.py +0 -159
  53. huggingface_hub/commands/upload.py +0 -318
  54. huggingface_hub/commands/upload_large_folder.py +0 -131
  55. huggingface_hub/commands/user.py +0 -207
  56. huggingface_hub/commands/version.py +0 -40
  57. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/LICENSE +0 -0
  58. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/WHEEL +0 -0
  59. {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@ import uuid
9
9
  import warnings
10
10
  from dataclasses import dataclass
11
11
  from pathlib import Path
12
- from typing import Any, BinaryIO, NoReturn, Optional, Union
12
+ from typing import Any, BinaryIO, Literal, NoReturn, Optional, Union, overload
13
13
  from urllib.parse import quote, urlparse
14
14
 
15
15
  import httpx
@@ -149,6 +149,34 @@ class HfFileMetadata:
149
149
  xet_file_data: Optional[XetFileData]
150
150
 
151
151
 
152
+ @dataclass
153
+ class DryRunFileInfo:
154
+ """Information returned when performing a dry run of a file download.
155
+
156
+ Returned by [`hf_hub_download`] when `dry_run=True`.
157
+
158
+ Args:
159
+ commit_hash (`str`):
160
+ The commit_hash related to the file.
161
+ file_size (`int`):
162
+ Size of the file. In case of an LFS file, contains the size of the actual LFS file, not the pointer.
163
+ filename (`str`):
164
+ Name of the file in the repo.
165
+ is_cached (`bool`):
166
+ Whether the file is already cached locally.
167
+ will_download (`bool`):
168
+ Whether the file will be downloaded if `hf_hub_download` is called with `dry_run=False`.
169
+ In practice, will_download is `True` if the file is not cached or if `force_download=True`.
170
+ """
171
+
172
+ commit_hash: str
173
+ file_size: int
174
+ filename: str
175
+ local_path: str
176
+ is_cached: bool
177
+ will_download: bool
178
+
179
+
152
180
  @validate_hf_hub_args
153
181
  def hf_hub_url(
154
182
  repo_id: str,
@@ -191,26 +219,23 @@ def hf_hub_url(
191
219
  'https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin'
192
220
  ```
193
221
 
194
- <Tip>
195
-
196
- Notes:
197
-
198
- Cloudfront is replicated over the globe so downloads are way faster for
199
- the end user (and it also lowers our bandwidth costs).
200
-
201
- Cloudfront aggressively caches files by default (default TTL is 24
202
- hours), however this is not an issue here because we implement a
203
- git-based versioning system on huggingface.co, which means that we store
204
- the files on S3/Cloudfront in a content-addressable way (i.e., the file
205
- name is its hash). Using content-addressable filenames means cache can't
206
- ever be stale.
207
-
208
- In terms of client-side caching from this library, we base our caching
209
- on the objects' entity tag (`ETag`), which is an identifier of a
210
- specific version of a resource [1]_. An object's ETag is: its git-sha1
211
- if stored in git, or its sha256 if stored in git-lfs.
212
-
213
- </Tip>
222
+ > [!TIP]
223
+ > Notes:
224
+ >
225
+ > Cloudfront is replicated over the globe so downloads are way faster for
226
+ > the end user (and it also lowers our bandwidth costs).
227
+ >
228
+ > Cloudfront aggressively caches files by default (default TTL is 24
229
+ > hours), however this is not an issue here because we implement a
230
+ > git-based versioning system on huggingface.co, which means that we store
231
+ > the files on S3/Cloudfront in a content-addressable way (i.e., the file
232
+ > name is its hash). Using content-addressable filenames means cache can't
233
+ > ever be stale.
234
+ >
235
+ > In terms of client-side caching from this library, we base our caching
236
+ > on the objects' entity tag (`ETag`), which is an identifier of a
237
+ > specific version of a resource [1]_. An object's ETag is: its git-sha1
238
+ > if stored in git, or its sha256 if stored in git-lfs.
214
239
 
215
240
  References:
216
241
 
@@ -243,7 +268,7 @@ def _httpx_follow_relative_redirects(method: HTTP_METHOD_T, url: str, **httpx_kw
243
268
 
244
269
  This is useful to follow a redirection to a renamed repository without following redirection to a CDN.
245
270
 
246
- A backoff mechanism retries the HTTP call on 429, 503 and 504 errors.
271
+ A backoff mechanism retries the HTTP call on 5xx errors and network errors.
247
272
 
248
273
  Args:
249
274
  method (`str`):
@@ -766,6 +791,75 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
766
791
  pass
767
792
 
768
793
 
794
+ @overload
795
+ def hf_hub_download(
796
+ repo_id: str,
797
+ filename: str,
798
+ *,
799
+ subfolder: Optional[str] = None,
800
+ repo_type: Optional[str] = None,
801
+ revision: Optional[str] = None,
802
+ library_name: Optional[str] = None,
803
+ library_version: Optional[str] = None,
804
+ cache_dir: Union[str, Path, None] = None,
805
+ local_dir: Union[str, Path, None] = None,
806
+ user_agent: Union[dict, str, None] = None,
807
+ force_download: bool = False,
808
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
809
+ token: Union[bool, str, None] = None,
810
+ local_files_only: bool = False,
811
+ headers: Optional[dict[str, str]] = None,
812
+ endpoint: Optional[str] = None,
813
+ dry_run: Literal[False] = False,
814
+ ) -> str: ...
815
+
816
+
817
+ @overload
818
+ def hf_hub_download(
819
+ repo_id: str,
820
+ filename: str,
821
+ *,
822
+ subfolder: Optional[str] = None,
823
+ repo_type: Optional[str] = None,
824
+ revision: Optional[str] = None,
825
+ library_name: Optional[str] = None,
826
+ library_version: Optional[str] = None,
827
+ cache_dir: Union[str, Path, None] = None,
828
+ local_dir: Union[str, Path, None] = None,
829
+ user_agent: Union[dict, str, None] = None,
830
+ force_download: bool = False,
831
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
832
+ token: Union[bool, str, None] = None,
833
+ local_files_only: bool = False,
834
+ headers: Optional[dict[str, str]] = None,
835
+ endpoint: Optional[str] = None,
836
+ dry_run: Literal[True] = True,
837
+ ) -> DryRunFileInfo: ...
838
+
839
+
840
+ @overload
841
+ def hf_hub_download(
842
+ repo_id: str,
843
+ filename: str,
844
+ *,
845
+ subfolder: Optional[str] = None,
846
+ repo_type: Optional[str] = None,
847
+ revision: Optional[str] = None,
848
+ library_name: Optional[str] = None,
849
+ library_version: Optional[str] = None,
850
+ cache_dir: Union[str, Path, None] = None,
851
+ local_dir: Union[str, Path, None] = None,
852
+ user_agent: Union[dict, str, None] = None,
853
+ force_download: bool = False,
854
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
855
+ token: Union[bool, str, None] = None,
856
+ local_files_only: bool = False,
857
+ headers: Optional[dict[str, str]] = None,
858
+ endpoint: Optional[str] = None,
859
+ dry_run: bool = False,
860
+ ) -> Union[str, DryRunFileInfo]: ...
861
+
862
+
769
863
  @validate_hf_hub_args
770
864
  def hf_hub_download(
771
865
  repo_id: str,
@@ -785,7 +879,8 @@ def hf_hub_download(
785
879
  local_files_only: bool = False,
786
880
  headers: Optional[dict[str, str]] = None,
787
881
  endpoint: Optional[str] = None,
788
- ) -> str:
882
+ dry_run: bool = False,
883
+ ) -> Union[str, DryRunFileInfo]:
789
884
  """Download a given file if it's not already present in the local cache.
790
885
 
791
886
  The new cache file layout looks like this:
@@ -860,9 +955,14 @@ def hf_hub_download(
860
955
  local cached file if it exists.
861
956
  headers (`dict`, *optional*):
862
957
  Additional headers to be sent with the request.
958
+ dry_run (`bool`, *optional*, defaults to `False`):
959
+ If `True`, perform a dry run without actually downloading the file. Returns a
960
+ [`DryRunFileInfo`] object containing information about what would be downloaded.
863
961
 
864
962
  Returns:
865
- `str`: Local path of file or if networking is off, last version of file cached on disk.
963
+ `str` or [`DryRunFileInfo`]:
964
+ - If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
965
+ - If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
866
966
 
867
967
  Raises:
868
968
  [`~utils.RepositoryNotFoundError`]
@@ -932,6 +1032,7 @@ def hf_hub_download(
932
1032
  cache_dir=cache_dir,
933
1033
  force_download=force_download,
934
1034
  local_files_only=local_files_only,
1035
+ dry_run=dry_run,
935
1036
  )
936
1037
  else:
937
1038
  return _hf_hub_download_to_cache_dir(
@@ -950,6 +1051,7 @@ def hf_hub_download(
950
1051
  # Additional options
951
1052
  local_files_only=local_files_only,
952
1053
  force_download=force_download,
1054
+ dry_run=dry_run,
953
1055
  )
954
1056
 
955
1057
 
@@ -970,7 +1072,8 @@ def _hf_hub_download_to_cache_dir(
970
1072
  # Additional options
971
1073
  local_files_only: bool,
972
1074
  force_download: bool,
973
- ) -> str:
1075
+ dry_run: bool,
1076
+ ) -> Union[str, DryRunFileInfo]:
974
1077
  """Download a given file to a cache folder, if not already present.
975
1078
 
976
1079
  Method should not be called directly. Please use `hf_hub_download` instead.
@@ -990,8 +1093,18 @@ def _hf_hub_download_to_cache_dir(
990
1093
  # if user provides a commit_hash and they already have the file on disk, shortcut everything.
991
1094
  if REGEX_COMMIT_HASH.match(revision):
992
1095
  pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
993
- if os.path.exists(pointer_path) and not force_download:
994
- return pointer_path
1096
+ if os.path.exists(pointer_path):
1097
+ if dry_run:
1098
+ return DryRunFileInfo(
1099
+ commit_hash=revision,
1100
+ file_size=os.path.getsize(pointer_path),
1101
+ filename=filename,
1102
+ is_cached=True,
1103
+ local_path=pointer_path,
1104
+ will_download=force_download,
1105
+ )
1106
+ if not force_download:
1107
+ return pointer_path
995
1108
 
996
1109
  # Try to get metadata (etag, commit_hash, url, size) from the server.
997
1110
  # If we can't, a HEAD request error is returned.
@@ -1034,8 +1147,18 @@ def _hf_hub_download_to_cache_dir(
1034
1147
  # Return pointer file if exists
1035
1148
  if commit_hash is not None:
1036
1149
  pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
1037
- if os.path.exists(pointer_path) and not force_download:
1038
- return pointer_path
1150
+ if os.path.exists(pointer_path):
1151
+ if dry_run:
1152
+ return DryRunFileInfo(
1153
+ commit_hash=commit_hash,
1154
+ file_size=os.path.getsize(pointer_path),
1155
+ filename=filename,
1156
+ is_cached=True,
1157
+ local_path=pointer_path,
1158
+ will_download=force_download,
1159
+ )
1160
+ if not force_download:
1161
+ return pointer_path
1039
1162
 
1040
1163
  # Otherwise, raise appropriate error
1041
1164
  _raise_on_head_call_error(head_call_error, force_download, local_files_only)
@@ -1048,6 +1171,17 @@ def _hf_hub_download_to_cache_dir(
1048
1171
  blob_path = os.path.join(storage_folder, "blobs", etag)
1049
1172
  pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
1050
1173
 
1174
+ if dry_run:
1175
+ is_cached = os.path.exists(pointer_path) or os.path.exists(blob_path)
1176
+ return DryRunFileInfo(
1177
+ commit_hash=commit_hash,
1178
+ file_size=expected_size,
1179
+ filename=filename,
1180
+ is_cached=is_cached,
1181
+ local_path=pointer_path,
1182
+ will_download=force_download or not is_cached,
1183
+ )
1184
+
1051
1185
  os.makedirs(os.path.dirname(blob_path), exist_ok=True)
1052
1186
  os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
1053
1187
 
@@ -1127,7 +1261,8 @@ def _hf_hub_download_to_local_dir(
1127
1261
  cache_dir: str,
1128
1262
  force_download: bool,
1129
1263
  local_files_only: bool,
1130
- ) -> str:
1264
+ dry_run: bool,
1265
+ ) -> Union[str, DryRunFileInfo]:
1131
1266
  """Download a given file to a local folder, if not already present.
1132
1267
 
1133
1268
  Method should not be called directly. Please use `hf_hub_download` instead.
@@ -1142,13 +1277,23 @@ def _hf_hub_download_to_local_dir(
1142
1277
 
1143
1278
  # Local file exists + metadata exists + commit_hash matches => return file
1144
1279
  if (
1145
- not force_download
1146
- and REGEX_COMMIT_HASH.match(revision)
1280
+ REGEX_COMMIT_HASH.match(revision)
1147
1281
  and paths.file_path.is_file()
1148
1282
  and local_metadata is not None
1149
1283
  and local_metadata.commit_hash == revision
1150
1284
  ):
1151
- return str(paths.file_path)
1285
+ local_file = str(paths.file_path)
1286
+ if dry_run:
1287
+ return DryRunFileInfo(
1288
+ commit_hash=revision,
1289
+ file_size=os.path.getsize(local_file),
1290
+ filename=filename,
1291
+ is_cached=True,
1292
+ local_path=local_file,
1293
+ will_download=force_download,
1294
+ )
1295
+ if not force_download:
1296
+ return local_file
1152
1297
 
1153
1298
  # Local file doesn't exist or commit_hash doesn't match => we need the etag
1154
1299
  (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
@@ -1165,11 +1310,24 @@ def _hf_hub_download_to_local_dir(
1165
1310
 
1166
1311
  if head_call_error is not None:
1167
1312
  # No HEAD call but local file exists => default to local file
1168
- if not force_download and paths.file_path.is_file():
1169
- logger.warning(
1170
- f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
1171
- )
1172
- return str(paths.file_path)
1313
+ if paths.file_path.is_file():
1314
+ if dry_run or not force_download:
1315
+ logger.warning(
1316
+ f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
1317
+ )
1318
+ local_path = str(paths.file_path)
1319
+ if dry_run and local_metadata is not None:
1320
+ return DryRunFileInfo(
1321
+ commit_hash=local_metadata.commit_hash,
1322
+ file_size=os.path.getsize(local_path),
1323
+ filename=filename,
1324
+ is_cached=True,
1325
+ local_path=local_path,
1326
+ will_download=force_download,
1327
+ )
1328
+ if not force_download:
1329
+ return local_path
1330
+
1173
1331
  # Otherwise => raise
1174
1332
  _raise_on_head_call_error(head_call_error, force_download, local_files_only)
1175
1333
 
@@ -1184,6 +1342,15 @@ def _hf_hub_download_to_local_dir(
1184
1342
  # etag matches => update metadata and return file
1185
1343
  if local_metadata is not None and local_metadata.etag == etag:
1186
1344
  write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
1345
+ if dry_run:
1346
+ return DryRunFileInfo(
1347
+ commit_hash=commit_hash,
1348
+ file_size=expected_size,
1349
+ filename=filename,
1350
+ is_cached=True,
1351
+ local_path=str(paths.file_path),
1352
+ will_download=False,
1353
+ )
1187
1354
  return str(paths.file_path)
1188
1355
 
1189
1356
  # metadata is outdated + etag is a sha256
@@ -1195,6 +1362,15 @@ def _hf_hub_download_to_local_dir(
1195
1362
  file_hash = sha_fileobj(f).hex()
1196
1363
  if file_hash == etag:
1197
1364
  write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
1365
+ if dry_run:
1366
+ return DryRunFileInfo(
1367
+ commit_hash=commit_hash,
1368
+ file_size=expected_size,
1369
+ filename=filename,
1370
+ is_cached=True,
1371
+ local_path=str(paths.file_path),
1372
+ will_download=False,
1373
+ )
1198
1374
  return str(paths.file_path)
1199
1375
 
1200
1376
  # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
@@ -1213,8 +1389,28 @@ def _hf_hub_download_to_local_dir(
1213
1389
  paths.file_path.parent.mkdir(parents=True, exist_ok=True)
1214
1390
  shutil.copyfile(cached_path, paths.file_path)
1215
1391
  write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
1392
+ if dry_run:
1393
+ return DryRunFileInfo(
1394
+ commit_hash=commit_hash,
1395
+ file_size=expected_size,
1396
+ filename=filename,
1397
+ is_cached=True,
1398
+ local_path=str(paths.file_path),
1399
+ will_download=False,
1400
+ )
1216
1401
  return str(paths.file_path)
1217
1402
 
1403
+ if dry_run:
1404
+ is_cached = paths.file_path.is_file()
1405
+ return DryRunFileInfo(
1406
+ commit_hash=commit_hash,
1407
+ file_size=expected_size,
1408
+ filename=filename,
1409
+ is_cached=is_cached,
1410
+ local_path=str(paths.file_path),
1411
+ will_download=force_download or not is_cached,
1412
+ )
1413
+
1218
1414
  # Otherwise, let's download the file!
1219
1415
  with WeakFileLock(paths.lock_path):
1220
1416
  paths.file_path.unlink(missing_ok=True) # delete outdated file first