huggingface-hub 0.24.7__py3-none-any.whl → 0.25.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (52) hide show
  1. huggingface_hub/__init__.py +21 -1
  2. huggingface_hub/_commit_api.py +4 -4
  3. huggingface_hub/_inference_endpoints.py +13 -1
  4. huggingface_hub/_local_folder.py +191 -4
  5. huggingface_hub/_login.py +6 -6
  6. huggingface_hub/_snapshot_download.py +8 -17
  7. huggingface_hub/_space_api.py +5 -0
  8. huggingface_hub/_tensorboard_logger.py +29 -13
  9. huggingface_hub/_upload_large_folder.py +573 -0
  10. huggingface_hub/_webhooks_server.py +1 -1
  11. huggingface_hub/commands/_cli_utils.py +5 -0
  12. huggingface_hub/commands/download.py +8 -0
  13. huggingface_hub/commands/huggingface_cli.py +6 -1
  14. huggingface_hub/commands/lfs.py +2 -1
  15. huggingface_hub/commands/repo_files.py +2 -2
  16. huggingface_hub/commands/scan_cache.py +99 -57
  17. huggingface_hub/commands/tag.py +1 -1
  18. huggingface_hub/commands/upload.py +2 -1
  19. huggingface_hub/commands/upload_large_folder.py +129 -0
  20. huggingface_hub/commands/version.py +37 -0
  21. huggingface_hub/community.py +2 -2
  22. huggingface_hub/errors.py +218 -1
  23. huggingface_hub/fastai_utils.py +2 -3
  24. huggingface_hub/file_download.py +61 -62
  25. huggingface_hub/hf_api.py +758 -314
  26. huggingface_hub/hf_file_system.py +15 -23
  27. huggingface_hub/hub_mixin.py +27 -25
  28. huggingface_hub/inference/_client.py +78 -127
  29. huggingface_hub/inference/_generated/_async_client.py +169 -144
  30. huggingface_hub/inference/_generated/types/base.py +0 -9
  31. huggingface_hub/inference/_templating.py +2 -3
  32. huggingface_hub/inference_api.py +2 -2
  33. huggingface_hub/keras_mixin.py +2 -2
  34. huggingface_hub/lfs.py +7 -98
  35. huggingface_hub/repocard.py +6 -5
  36. huggingface_hub/repository.py +5 -5
  37. huggingface_hub/serialization/_torch.py +64 -11
  38. huggingface_hub/utils/__init__.py +13 -14
  39. huggingface_hub/utils/_cache_manager.py +97 -14
  40. huggingface_hub/utils/_fixes.py +18 -2
  41. huggingface_hub/utils/_http.py +228 -2
  42. huggingface_hub/utils/_lfs.py +110 -0
  43. huggingface_hub/utils/_runtime.py +7 -1
  44. huggingface_hub/utils/_token.py +3 -2
  45. {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/METADATA +2 -2
  46. {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/RECORD +50 -48
  47. huggingface_hub/inference/_types.py +0 -52
  48. huggingface_hub/utils/_errors.py +0 -397
  49. {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/LICENSE +0 -0
  50. {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/WHEEL +0 -0
  51. {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/entry_points.txt +0 -0
  52. {huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/top_level.txt +0 -0
@@ -18,41 +18,29 @@ from urllib.parse import quote, urlparse
18
18
 
19
19
  import requests
20
20
 
21
- from . import __version__ # noqa: F401 # for backward compatibility
21
+ from . import (
22
+ __version__, # noqa: F401 # for backward compatibility
23
+ constants,
24
+ )
22
25
  from ._local_folder import (
23
26
  get_local_download_paths,
24
27
  read_download_metadata,
25
28
  write_download_metadata,
26
29
  )
27
30
  from .constants import (
28
- DEFAULT_ETAG_TIMEOUT,
29
- DEFAULT_REQUEST_TIMEOUT,
30
- DEFAULT_REVISION,
31
- DOWNLOAD_CHUNK_SIZE,
32
- ENDPOINT,
33
- HF_HUB_CACHE,
34
- HF_HUB_DISABLE_SYMLINKS_WARNING,
35
- HF_HUB_DOWNLOAD_TIMEOUT,
36
- HF_HUB_ENABLE_HF_TRANSFER,
37
- HF_HUB_ETAG_TIMEOUT,
38
- HF_TRANSFER_CONCURRENCY,
39
- HUGGINGFACE_CO_URL_TEMPLATE,
40
- HUGGINGFACE_HEADER_X_LINKED_ETAG,
41
- HUGGINGFACE_HEADER_X_LINKED_SIZE,
42
- HUGGINGFACE_HEADER_X_REPO_COMMIT,
31
+ HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility
43
32
  HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
44
- REPO_ID_SEPARATOR,
45
- REPO_TYPES,
46
- REPO_TYPES_URL_PREFIXES,
47
33
  )
48
- from .utils import (
34
+ from .errors import (
49
35
  EntryNotFoundError,
50
36
  FileMetadataError,
51
37
  GatedRepoError,
52
38
  LocalEntryNotFoundError,
53
- OfflineModeIsEnabled,
54
39
  RepositoryNotFoundError,
55
40
  RevisionNotFoundError,
41
+ )
42
+ from .utils import (
43
+ OfflineModeIsEnabled,
56
44
  SoftTemporaryDirectory,
57
45
  WeakFileLock,
58
46
  build_hf_headers,
@@ -116,7 +104,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
116
104
  """
117
105
  # Defaults to HF cache
118
106
  if cache_dir is None:
119
- cache_dir = HF_HUB_CACHE
107
+ cache_dir = constants.HF_HUB_CACHE
120
108
  cache_dir = str(Path(cache_dir).expanduser().resolve()) # make it unique
121
109
 
122
110
  # Check symlink compatibility only once (per cache directory) at first time use
@@ -137,7 +125,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
137
125
  # Likely running on Windows
138
126
  _are_symlinks_supported_in_dir[cache_dir] = False
139
127
 
140
- if not HF_HUB_DISABLE_SYMLINKS_WARNING:
128
+ if not constants.HF_HUB_DISABLE_SYMLINKS_WARNING:
141
129
  message = (
142
130
  "`huggingface_hub` cache-system uses symlinks by default to"
143
131
  " efficiently store duplicated files but your machine does not"
@@ -152,7 +140,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
152
140
  message += (
153
141
  "\nTo support symlinks on Windows, you either need to"
154
142
  " activate Developer Mode or to run Python as an"
155
- " administrator. In order to see activate developer mode,"
143
+ " administrator. In order to activate developer mode,"
156
144
  " see this article:"
157
145
  " https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development"
158
146
  )
@@ -257,20 +245,20 @@ def hf_hub_url(
257
245
  if subfolder is not None:
258
246
  filename = f"{subfolder}/{filename}"
259
247
 
260
- if repo_type not in REPO_TYPES:
248
+ if repo_type not in constants.REPO_TYPES:
261
249
  raise ValueError("Invalid repo type")
262
250
 
263
- if repo_type in REPO_TYPES_URL_PREFIXES:
264
- repo_id = REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
251
+ if repo_type in constants.REPO_TYPES_URL_PREFIXES:
252
+ repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
265
253
 
266
254
  if revision is None:
267
- revision = DEFAULT_REVISION
255
+ revision = constants.DEFAULT_REVISION
268
256
  url = HUGGINGFACE_CO_URL_TEMPLATE.format(
269
257
  repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
270
258
  )
271
259
  # Update endpoint if provided
272
- if endpoint is not None and url.startswith(ENDPOINT):
273
- url = endpoint + url[len(ENDPOINT) :]
260
+ if endpoint is not None and url.startswith(constants.ENDPOINT):
261
+ url = endpoint + url[len(constants.ENDPOINT) :]
274
262
  return url
275
263
 
276
264
 
@@ -333,7 +321,7 @@ def filename_to_url(
333
321
  )
334
322
 
335
323
  if cache_dir is None:
336
- cache_dir = HF_HUB_CACHE
324
+ cache_dir = constants.HF_HUB_CACHE
337
325
  if isinstance(cache_dir, Path):
338
326
  cache_dir = str(cache_dir)
339
327
 
@@ -439,8 +427,12 @@ def http_get(
439
427
  The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If
440
428
  not set, the filename is guessed from the URL or the `Content-Disposition` header.
441
429
  """
430
+ if expected_size is not None and resume_size == expected_size:
431
+ # If the file is already fully downloaded, we don't need to download it again.
432
+ return
433
+
442
434
  hf_transfer = None
443
- if HF_HUB_ENABLE_HF_TRANSFER:
435
+ if constants.HF_HUB_ENABLE_HF_TRANSFER:
444
436
  if resume_size != 0:
445
437
  warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
446
438
  elif proxies is not None:
@@ -461,7 +453,7 @@ def http_get(
461
453
  headers["Range"] = "bytes=%d-" % (resume_size,)
462
454
 
463
455
  r = _request_wrapper(
464
- method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=HF_HUB_DOWNLOAD_TIMEOUT
456
+ method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
465
457
  )
466
458
  hf_raise_for_status(r)
467
459
  content_length = r.headers.get("Content-Length")
@@ -511,7 +503,7 @@ def http_get(
511
503
  )
512
504
 
513
505
  with progress_cm as progress:
514
- if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
506
+ if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
515
507
  supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
516
508
  if not supports_callback:
517
509
  warnings.warn(
@@ -523,8 +515,8 @@ def http_get(
523
515
  hf_transfer.download(
524
516
  url=url,
525
517
  filename=temp_file.name,
526
- max_files=HF_TRANSFER_CONCURRENCY,
527
- chunk_size=DOWNLOAD_CHUNK_SIZE,
518
+ max_files=constants.HF_TRANSFER_CONCURRENCY,
519
+ chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
528
520
  headers=headers,
529
521
  parallel_failures=3,
530
522
  max_retries=5,
@@ -546,7 +538,7 @@ def http_get(
546
538
  return
547
539
  new_resume_size = resume_size
548
540
  try:
549
- for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
541
+ for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
550
542
  if chunk: # filter out keep-alive new chunks
551
543
  progress.update(len(chunk))
552
544
  temp_file.write(chunk)
@@ -594,7 +586,7 @@ def cached_download(
594
586
  force_download: bool = False,
595
587
  force_filename: Optional[str] = None,
596
588
  proxies: Optional[Dict] = None,
597
- etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
589
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
598
590
  resume_download: Optional[bool] = None,
599
591
  token: Union[bool, str, None] = None,
600
592
  local_files_only: bool = False,
@@ -672,9 +664,9 @@ def cached_download(
672
664
 
673
665
  </Tip>
674
666
  """
675
- if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
667
+ if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT:
676
668
  # Respect environment variable above user value
677
- etag_timeout = HF_HUB_ETAG_TIMEOUT
669
+ etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
678
670
 
679
671
  if not legacy_cache_layout:
680
672
  warnings.warn(
@@ -691,7 +683,7 @@ def cached_download(
691
683
  )
692
684
 
693
685
  if cache_dir is None:
694
- cache_dir = HF_HUB_CACHE
686
+ cache_dir = constants.HF_HUB_CACHE
695
687
  if isinstance(cache_dir, Path):
696
688
  cache_dir = str(cache_dir)
697
689
 
@@ -723,7 +715,7 @@ def cached_download(
723
715
  )
724
716
  headers.pop("Accept-Encoding", None)
725
717
  hf_raise_for_status(r)
726
- etag = r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")
718
+ etag = r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")
727
719
  # We favor a custom header indicating the etag of the linked resource, and
728
720
  # we fallback to the regular etag header.
729
721
  # If we don't have any of those, raise an error.
@@ -970,7 +962,7 @@ def repo_folder_name(*, repo_id: str, repo_type: str) -> str:
970
962
  """
971
963
  # remove all `/` occurrences to correctly convert repo to directory name
972
964
  parts = [f"{repo_type}s", *repo_id.split("/")]
973
- return REPO_ID_SEPARATOR.join(parts)
965
+ return constants.REPO_ID_SEPARATOR.join(parts)
974
966
 
975
967
 
976
968
  def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
@@ -1021,7 +1013,7 @@ def hf_hub_download(
1021
1013
  user_agent: Union[Dict, str, None] = None,
1022
1014
  force_download: bool = False,
1023
1015
  proxies: Optional[Dict] = None,
1024
- etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
1016
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
1025
1017
  token: Union[bool, str, None] = None,
1026
1018
  local_files_only: bool = False,
1027
1019
  headers: Optional[Dict[str, str]] = None,
@@ -1135,9 +1127,9 @@ def hf_hub_download(
1135
1127
  If some parameter value is invalid.
1136
1128
 
1137
1129
  """
1138
- if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
1130
+ if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT:
1139
1131
  # Respect environment variable above user value
1140
- etag_timeout = HF_HUB_ETAG_TIMEOUT
1132
+ etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
1141
1133
 
1142
1134
  if force_filename is not None:
1143
1135
  warnings.warn(
@@ -1180,9 +1172,9 @@ def hf_hub_download(
1180
1172
  )
1181
1173
 
1182
1174
  if cache_dir is None:
1183
- cache_dir = HF_HUB_CACHE
1175
+ cache_dir = constants.HF_HUB_CACHE
1184
1176
  if revision is None:
1185
- revision = DEFAULT_REVISION
1177
+ revision = constants.DEFAULT_REVISION
1186
1178
  if isinstance(cache_dir, Path):
1187
1179
  cache_dir = str(cache_dir)
1188
1180
  if isinstance(local_dir, Path):
@@ -1196,8 +1188,8 @@ def hf_hub_download(
1196
1188
 
1197
1189
  if repo_type is None:
1198
1190
  repo_type = "model"
1199
- if repo_type not in REPO_TYPES:
1200
- raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
1191
+ if repo_type not in constants.REPO_TYPES:
1192
+ raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
1201
1193
 
1202
1194
  headers = build_hf_headers(
1203
1195
  token=token,
@@ -1582,10 +1574,10 @@ def try_to_load_from_cache(
1582
1574
  revision = "main"
1583
1575
  if repo_type is None:
1584
1576
  repo_type = "model"
1585
- if repo_type not in REPO_TYPES:
1586
- raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
1577
+ if repo_type not in constants.REPO_TYPES:
1578
+ raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
1587
1579
  if cache_dir is None:
1588
- cache_dir = HF_HUB_CACHE
1580
+ cache_dir = constants.HF_HUB_CACHE
1589
1581
 
1590
1582
  object_id = repo_id.replace("/", "--")
1591
1583
  repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}")
@@ -1626,7 +1618,7 @@ def get_hf_file_metadata(
1626
1618
  url: str,
1627
1619
  token: Union[bool, str, None] = None,
1628
1620
  proxies: Optional[Dict] = None,
1629
- timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT,
1621
+ timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
1630
1622
  library_name: Optional[str] = None,
1631
1623
  library_version: Optional[str] = None,
1632
1624
  user_agent: Union[Dict, str, None] = None,
@@ -1684,15 +1676,17 @@ def get_hf_file_metadata(
1684
1676
 
1685
1677
  # Return
1686
1678
  return HfFileMetadata(
1687
- commit_hash=r.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT),
1679
+ commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
1688
1680
  # We favor a custom header indicating the etag of the linked resource, and
1689
1681
  # we fallback to the regular etag header.
1690
- etag=_normalize_etag(r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
1682
+ etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
1691
1683
  # Either from response headers (if redirected) or defaults to request url
1692
1684
  # Do not use directly `url`, as `_request_wrapper` might have followed relative
1693
1685
  # redirects.
1694
1686
  location=r.headers.get("Location") or r.request.url, # type: ignore
1695
- size=_int_or_none(r.headers.get(HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")),
1687
+ size=_int_or_none(
1688
+ r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
1689
+ ),
1696
1690
  )
1697
1691
 
1698
1692
 
@@ -1737,7 +1731,7 @@ def _get_metadata_or_catch_error(
1737
1731
  ),
1738
1732
  )
1739
1733
 
1740
- url = url = hf_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint)
1734
+ url = hf_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint)
1741
1735
  url_to_download: str = url
1742
1736
  etag: Optional[str] = None
1743
1737
  commit_hash: Optional[str] = None
@@ -1755,11 +1749,16 @@ def _get_metadata_or_catch_error(
1755
1749
  except EntryNotFoundError as http_error:
1756
1750
  if storage_folder is not None and relative_filename is not None:
1757
1751
  # Cache the non-existence of the file
1758
- commit_hash = http_error.response.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT)
1752
+ commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
1759
1753
  if commit_hash is not None:
1760
1754
  no_exist_file_path = Path(storage_folder) / ".no_exist" / commit_hash / relative_filename
1761
1755
  no_exist_file_path.parent.mkdir(parents=True, exist_ok=True)
1762
- no_exist_file_path.touch()
1756
+ try:
1757
+ no_exist_file_path.touch()
1758
+ except OSError as e:
1759
+ logger.error(
1760
+ f"Could not cache non-existence of file. Will ignore error and continue. Error: {e}"
1761
+ )
1763
1762
  _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
1764
1763
  raise
1765
1764
 
@@ -1889,14 +1888,14 @@ def _download_to_tmp_and_move(
1889
1888
  # Do nothing if already exists (except if force_download=True)
1890
1889
  return
1891
1890
 
1892
- if incomplete_path.exists() and (force_download or (HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
1891
+ if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
1893
1892
  # By default, we will try to resume the download if possible.
1894
1893
  # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
1895
1894
  # not resume the download => delete the incomplete file.
1896
1895
  message = f"Removing incomplete file '{incomplete_path}'"
1897
1896
  if force_download:
1898
1897
  message += " (force_download=True)"
1899
- elif HF_HUB_ENABLE_HF_TRANSFER and not proxies:
1898
+ elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
1900
1899
  message += " (hf_transfer=True)"
1901
1900
  logger.info(message)
1902
1901
  incomplete_path.unlink(missing_ok=True)