huggingface-hub 0.24.6__py3-none-any.whl → 0.25.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (52) hide show
  1. huggingface_hub/__init__.py +21 -1
  2. huggingface_hub/_commit_api.py +4 -4
  3. huggingface_hub/_inference_endpoints.py +13 -1
  4. huggingface_hub/_local_folder.py +191 -4
  5. huggingface_hub/_login.py +6 -6
  6. huggingface_hub/_snapshot_download.py +8 -17
  7. huggingface_hub/_space_api.py +5 -0
  8. huggingface_hub/_tensorboard_logger.py +29 -13
  9. huggingface_hub/_upload_large_folder.py +573 -0
  10. huggingface_hub/_webhooks_server.py +1 -1
  11. huggingface_hub/commands/_cli_utils.py +5 -0
  12. huggingface_hub/commands/download.py +8 -0
  13. huggingface_hub/commands/huggingface_cli.py +6 -1
  14. huggingface_hub/commands/lfs.py +2 -1
  15. huggingface_hub/commands/repo_files.py +2 -2
  16. huggingface_hub/commands/scan_cache.py +99 -57
  17. huggingface_hub/commands/tag.py +1 -1
  18. huggingface_hub/commands/upload.py +2 -1
  19. huggingface_hub/commands/upload_large_folder.py +129 -0
  20. huggingface_hub/commands/version.py +37 -0
  21. huggingface_hub/community.py +2 -2
  22. huggingface_hub/errors.py +218 -1
  23. huggingface_hub/fastai_utils.py +2 -3
  24. huggingface_hub/file_download.py +63 -63
  25. huggingface_hub/hf_api.py +758 -314
  26. huggingface_hub/hf_file_system.py +15 -23
  27. huggingface_hub/hub_mixin.py +27 -25
  28. huggingface_hub/inference/_client.py +78 -127
  29. huggingface_hub/inference/_generated/_async_client.py +169 -144
  30. huggingface_hub/inference/_generated/types/base.py +0 -9
  31. huggingface_hub/inference/_templating.py +2 -3
  32. huggingface_hub/inference_api.py +2 -2
  33. huggingface_hub/keras_mixin.py +2 -2
  34. huggingface_hub/lfs.py +7 -98
  35. huggingface_hub/repocard.py +6 -5
  36. huggingface_hub/repository.py +5 -5
  37. huggingface_hub/serialization/_torch.py +64 -11
  38. huggingface_hub/utils/__init__.py +13 -14
  39. huggingface_hub/utils/_cache_manager.py +97 -14
  40. huggingface_hub/utils/_fixes.py +18 -2
  41. huggingface_hub/utils/_http.py +228 -2
  42. huggingface_hub/utils/_lfs.py +110 -0
  43. huggingface_hub/utils/_runtime.py +7 -1
  44. huggingface_hub/utils/_token.py +3 -2
  45. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/METADATA +2 -2
  46. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/RECORD +50 -48
  47. huggingface_hub/inference/_types.py +0 -52
  48. huggingface_hub/utils/_errors.py +0 -397
  49. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/LICENSE +0 -0
  50. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/WHEEL +0 -0
  51. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/entry_points.txt +0 -0
  52. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/top_level.txt +0 -0
@@ -18,41 +18,29 @@ from urllib.parse import quote, urlparse
18
18
 
19
19
  import requests
20
20
 
21
- from . import __version__ # noqa: F401 # for backward compatibility
21
+ from . import (
22
+ __version__, # noqa: F401 # for backward compatibility
23
+ constants,
24
+ )
22
25
  from ._local_folder import (
23
26
  get_local_download_paths,
24
27
  read_download_metadata,
25
28
  write_download_metadata,
26
29
  )
27
30
  from .constants import (
28
- DEFAULT_ETAG_TIMEOUT,
29
- DEFAULT_REQUEST_TIMEOUT,
30
- DEFAULT_REVISION,
31
- DOWNLOAD_CHUNK_SIZE,
32
- ENDPOINT,
33
- HF_HUB_CACHE,
34
- HF_HUB_DISABLE_SYMLINKS_WARNING,
35
- HF_HUB_DOWNLOAD_TIMEOUT,
36
- HF_HUB_ENABLE_HF_TRANSFER,
37
- HF_HUB_ETAG_TIMEOUT,
38
- HF_TRANSFER_CONCURRENCY,
39
- HUGGINGFACE_CO_URL_TEMPLATE,
40
- HUGGINGFACE_HEADER_X_LINKED_ETAG,
41
- HUGGINGFACE_HEADER_X_LINKED_SIZE,
42
- HUGGINGFACE_HEADER_X_REPO_COMMIT,
31
+ HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility
43
32
  HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
44
- REPO_ID_SEPARATOR,
45
- REPO_TYPES,
46
- REPO_TYPES_URL_PREFIXES,
47
33
  )
48
- from .utils import (
34
+ from .errors import (
49
35
  EntryNotFoundError,
50
36
  FileMetadataError,
51
37
  GatedRepoError,
52
38
  LocalEntryNotFoundError,
53
- OfflineModeIsEnabled,
54
39
  RepositoryNotFoundError,
55
40
  RevisionNotFoundError,
41
+ )
42
+ from .utils import (
43
+ OfflineModeIsEnabled,
56
44
  SoftTemporaryDirectory,
57
45
  WeakFileLock,
58
46
  build_hf_headers,
@@ -116,7 +104,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
116
104
  """
117
105
  # Defaults to HF cache
118
106
  if cache_dir is None:
119
- cache_dir = HF_HUB_CACHE
107
+ cache_dir = constants.HF_HUB_CACHE
120
108
  cache_dir = str(Path(cache_dir).expanduser().resolve()) # make it unique
121
109
 
122
110
  # Check symlink compatibility only once (per cache directory) at first time use
@@ -137,7 +125,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
137
125
  # Likely running on Windows
138
126
  _are_symlinks_supported_in_dir[cache_dir] = False
139
127
 
140
- if not HF_HUB_DISABLE_SYMLINKS_WARNING:
128
+ if not constants.HF_HUB_DISABLE_SYMLINKS_WARNING:
141
129
  message = (
142
130
  "`huggingface_hub` cache-system uses symlinks by default to"
143
131
  " efficiently store duplicated files but your machine does not"
@@ -152,7 +140,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
152
140
  message += (
153
141
  "\nTo support symlinks on Windows, you either need to"
154
142
  " activate Developer Mode or to run Python as an"
155
- " administrator. In order to see activate developer mode,"
143
+ " administrator. In order to activate developer mode,"
156
144
  " see this article:"
157
145
  " https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development"
158
146
  )
@@ -257,20 +245,20 @@ def hf_hub_url(
257
245
  if subfolder is not None:
258
246
  filename = f"{subfolder}/{filename}"
259
247
 
260
- if repo_type not in REPO_TYPES:
248
+ if repo_type not in constants.REPO_TYPES:
261
249
  raise ValueError("Invalid repo type")
262
250
 
263
- if repo_type in REPO_TYPES_URL_PREFIXES:
264
- repo_id = REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
251
+ if repo_type in constants.REPO_TYPES_URL_PREFIXES:
252
+ repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
265
253
 
266
254
  if revision is None:
267
- revision = DEFAULT_REVISION
255
+ revision = constants.DEFAULT_REVISION
268
256
  url = HUGGINGFACE_CO_URL_TEMPLATE.format(
269
257
  repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
270
258
  )
271
259
  # Update endpoint if provided
272
- if endpoint is not None and url.startswith(ENDPOINT):
273
- url = endpoint + url[len(ENDPOINT) :]
260
+ if endpoint is not None and url.startswith(constants.ENDPOINT):
261
+ url = endpoint + url[len(constants.ENDPOINT) :]
274
262
  return url
275
263
 
276
264
 
@@ -333,7 +321,7 @@ def filename_to_url(
333
321
  )
334
322
 
335
323
  if cache_dir is None:
336
- cache_dir = HF_HUB_CACHE
324
+ cache_dir = constants.HF_HUB_CACHE
337
325
  if isinstance(cache_dir, Path):
338
326
  cache_dir = str(cache_dir)
339
327
 
@@ -439,8 +427,12 @@ def http_get(
439
427
  The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If
440
428
  not set, the filename is guessed from the URL or the `Content-Disposition` header.
441
429
  """
430
+ if expected_size is not None and resume_size == expected_size:
431
+ # If the file is already fully downloaded, we don't need to download it again.
432
+ return
433
+
442
434
  hf_transfer = None
443
- if HF_HUB_ENABLE_HF_TRANSFER:
435
+ if constants.HF_HUB_ENABLE_HF_TRANSFER:
444
436
  if resume_size != 0:
445
437
  warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
446
438
  elif proxies is not None:
@@ -461,7 +453,7 @@ def http_get(
461
453
  headers["Range"] = "bytes=%d-" % (resume_size,)
462
454
 
463
455
  r = _request_wrapper(
464
- method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=HF_HUB_DOWNLOAD_TIMEOUT
456
+ method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
465
457
  )
466
458
  hf_raise_for_status(r)
467
459
  content_length = r.headers.get("Content-Length")
@@ -511,7 +503,7 @@ def http_get(
511
503
  )
512
504
 
513
505
  with progress_cm as progress:
514
- if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
506
+ if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
515
507
  supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
516
508
  if not supports_callback:
517
509
  warnings.warn(
@@ -523,8 +515,8 @@ def http_get(
523
515
  hf_transfer.download(
524
516
  url=url,
525
517
  filename=temp_file.name,
526
- max_files=HF_TRANSFER_CONCURRENCY,
527
- chunk_size=DOWNLOAD_CHUNK_SIZE,
518
+ max_files=constants.HF_TRANSFER_CONCURRENCY,
519
+ chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
528
520
  headers=headers,
529
521
  parallel_failures=3,
530
522
  max_retries=5,
@@ -546,7 +538,7 @@ def http_get(
546
538
  return
547
539
  new_resume_size = resume_size
548
540
  try:
549
- for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
541
+ for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
550
542
  if chunk: # filter out keep-alive new chunks
551
543
  progress.update(len(chunk))
552
544
  temp_file.write(chunk)
@@ -594,7 +586,7 @@ def cached_download(
594
586
  force_download: bool = False,
595
587
  force_filename: Optional[str] = None,
596
588
  proxies: Optional[Dict] = None,
597
- etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
589
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
598
590
  resume_download: Optional[bool] = None,
599
591
  token: Union[bool, str, None] = None,
600
592
  local_files_only: bool = False,
@@ -672,9 +664,9 @@ def cached_download(
672
664
 
673
665
  </Tip>
674
666
  """
675
- if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
667
+ if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT:
676
668
  # Respect environment variable above user value
677
- etag_timeout = HF_HUB_ETAG_TIMEOUT
669
+ etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
678
670
 
679
671
  if not legacy_cache_layout:
680
672
  warnings.warn(
@@ -691,7 +683,7 @@ def cached_download(
691
683
  )
692
684
 
693
685
  if cache_dir is None:
694
- cache_dir = HF_HUB_CACHE
686
+ cache_dir = constants.HF_HUB_CACHE
695
687
  if isinstance(cache_dir, Path):
696
688
  cache_dir = str(cache_dir)
697
689
 
@@ -723,7 +715,7 @@ def cached_download(
723
715
  )
724
716
  headers.pop("Accept-Encoding", None)
725
717
  hf_raise_for_status(r)
726
- etag = r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")
718
+ etag = r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")
727
719
  # We favor a custom header indicating the etag of the linked resource, and
728
720
  # we fallback to the regular etag header.
729
721
  # If we don't have any of those, raise an error.
@@ -970,7 +962,7 @@ def repo_folder_name(*, repo_id: str, repo_type: str) -> str:
970
962
  """
971
963
  # remove all `/` occurrences to correctly convert repo to directory name
972
964
  parts = [f"{repo_type}s", *repo_id.split("/")]
973
- return REPO_ID_SEPARATOR.join(parts)
965
+ return constants.REPO_ID_SEPARATOR.join(parts)
974
966
 
975
967
 
976
968
  def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
@@ -1021,7 +1013,7 @@ def hf_hub_download(
1021
1013
  user_agent: Union[Dict, str, None] = None,
1022
1014
  force_download: bool = False,
1023
1015
  proxies: Optional[Dict] = None,
1024
- etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
1016
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
1025
1017
  token: Union[bool, str, None] = None,
1026
1018
  local_files_only: bool = False,
1027
1019
  headers: Optional[Dict[str, str]] = None,
@@ -1135,9 +1127,9 @@ def hf_hub_download(
1135
1127
  If some parameter value is invalid.
1136
1128
 
1137
1129
  """
1138
- if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
1130
+ if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT:
1139
1131
  # Respect environment variable above user value
1140
- etag_timeout = HF_HUB_ETAG_TIMEOUT
1132
+ etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
1141
1133
 
1142
1134
  if force_filename is not None:
1143
1135
  warnings.warn(
@@ -1180,9 +1172,9 @@ def hf_hub_download(
1180
1172
  )
1181
1173
 
1182
1174
  if cache_dir is None:
1183
- cache_dir = HF_HUB_CACHE
1175
+ cache_dir = constants.HF_HUB_CACHE
1184
1176
  if revision is None:
1185
- revision = DEFAULT_REVISION
1177
+ revision = constants.DEFAULT_REVISION
1186
1178
  if isinstance(cache_dir, Path):
1187
1179
  cache_dir = str(cache_dir)
1188
1180
  if isinstance(local_dir, Path):
@@ -1196,8 +1188,8 @@ def hf_hub_download(
1196
1188
 
1197
1189
  if repo_type is None:
1198
1190
  repo_type = "model"
1199
- if repo_type not in REPO_TYPES:
1200
- raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
1191
+ if repo_type not in constants.REPO_TYPES:
1192
+ raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
1201
1193
 
1202
1194
  headers = build_hf_headers(
1203
1195
  token=token,
@@ -1396,7 +1388,8 @@ def _hf_hub_download_to_cache_dir(
1396
1388
  filename=filename,
1397
1389
  force_download=force_download,
1398
1390
  )
1399
- _create_symlink(blob_path, pointer_path, new_blob=True)
1391
+ if not os.path.exists(pointer_path):
1392
+ _create_symlink(blob_path, pointer_path, new_blob=True)
1400
1393
 
1401
1394
  return pointer_path
1402
1395
 
@@ -1581,10 +1574,10 @@ def try_to_load_from_cache(
1581
1574
  revision = "main"
1582
1575
  if repo_type is None:
1583
1576
  repo_type = "model"
1584
- if repo_type not in REPO_TYPES:
1585
- raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
1577
+ if repo_type not in constants.REPO_TYPES:
1578
+ raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
1586
1579
  if cache_dir is None:
1587
- cache_dir = HF_HUB_CACHE
1580
+ cache_dir = constants.HF_HUB_CACHE
1588
1581
 
1589
1582
  object_id = repo_id.replace("/", "--")
1590
1583
  repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}")
@@ -1625,7 +1618,7 @@ def get_hf_file_metadata(
1625
1618
  url: str,
1626
1619
  token: Union[bool, str, None] = None,
1627
1620
  proxies: Optional[Dict] = None,
1628
- timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT,
1621
+ timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
1629
1622
  library_name: Optional[str] = None,
1630
1623
  library_version: Optional[str] = None,
1631
1624
  user_agent: Union[Dict, str, None] = None,
@@ -1683,15 +1676,17 @@ def get_hf_file_metadata(
1683
1676
 
1684
1677
  # Return
1685
1678
  return HfFileMetadata(
1686
- commit_hash=r.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT),
1679
+ commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
1687
1680
  # We favor a custom header indicating the etag of the linked resource, and
1688
1681
  # we fallback to the regular etag header.
1689
- etag=_normalize_etag(r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
1682
+ etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
1690
1683
  # Either from response headers (if redirected) or defaults to request url
1691
1684
  # Do not use directly `url`, as `_request_wrapper` might have followed relative
1692
1685
  # redirects.
1693
1686
  location=r.headers.get("Location") or r.request.url, # type: ignore
1694
- size=_int_or_none(r.headers.get(HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")),
1687
+ size=_int_or_none(
1688
+ r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
1689
+ ),
1695
1690
  )
1696
1691
 
1697
1692
 
@@ -1736,7 +1731,7 @@ def _get_metadata_or_catch_error(
1736
1731
  ),
1737
1732
  )
1738
1733
 
1739
- url = url = hf_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint)
1734
+ url = hf_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint)
1740
1735
  url_to_download: str = url
1741
1736
  etag: Optional[str] = None
1742
1737
  commit_hash: Optional[str] = None
@@ -1754,11 +1749,16 @@ def _get_metadata_or_catch_error(
1754
1749
  except EntryNotFoundError as http_error:
1755
1750
  if storage_folder is not None and relative_filename is not None:
1756
1751
  # Cache the non-existence of the file
1757
- commit_hash = http_error.response.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT)
1752
+ commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
1758
1753
  if commit_hash is not None:
1759
1754
  no_exist_file_path = Path(storage_folder) / ".no_exist" / commit_hash / relative_filename
1760
1755
  no_exist_file_path.parent.mkdir(parents=True, exist_ok=True)
1761
- no_exist_file_path.touch()
1756
+ try:
1757
+ no_exist_file_path.touch()
1758
+ except OSError as e:
1759
+ logger.error(
1760
+ f"Could not cache non-existence of file. Will ignore error and continue. Error: {e}"
1761
+ )
1762
1762
  _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
1763
1763
  raise
1764
1764
 
@@ -1888,14 +1888,14 @@ def _download_to_tmp_and_move(
1888
1888
  # Do nothing if already exists (except if force_download=True)
1889
1889
  return
1890
1890
 
1891
- if incomplete_path.exists() and (force_download or (HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
1891
+ if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
1892
1892
  # By default, we will try to resume the download if possible.
1893
1893
  # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
1894
1894
  # not resume the download => delete the incomplete file.
1895
1895
  message = f"Removing incomplete file '{incomplete_path}'"
1896
1896
  if force_download:
1897
1897
  message += " (force_download=True)"
1898
- elif HF_HUB_ENABLE_HF_TRANSFER and not proxies:
1898
+ elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
1899
1899
  message += " (hf_transfer=True)"
1900
1900
  logger.info(message)
1901
1901
  incomplete_path.unlink(missing_ok=True)