huggingface-hub 0.23.5__py3-none-any.whl → 0.24.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (42) hide show
  1. huggingface_hub/__init__.py +47 -15
  2. huggingface_hub/_commit_api.py +38 -8
  3. huggingface_hub/_inference_endpoints.py +11 -4
  4. huggingface_hub/_local_folder.py +22 -13
  5. huggingface_hub/_snapshot_download.py +12 -7
  6. huggingface_hub/_webhooks_server.py +3 -1
  7. huggingface_hub/commands/huggingface_cli.py +4 -3
  8. huggingface_hub/commands/repo_files.py +128 -0
  9. huggingface_hub/constants.py +12 -0
  10. huggingface_hub/file_download.py +127 -91
  11. huggingface_hub/hf_api.py +979 -341
  12. huggingface_hub/hf_file_system.py +30 -3
  13. huggingface_hub/inference/_client.py +373 -42
  14. huggingface_hub/inference/_common.py +0 -2
  15. huggingface_hub/inference/_generated/_async_client.py +390 -48
  16. huggingface_hub/inference/_generated/types/__init__.py +4 -1
  17. huggingface_hub/inference/_generated/types/chat_completion.py +41 -21
  18. huggingface_hub/inference/_generated/types/feature_extraction.py +23 -5
  19. huggingface_hub/inference/_generated/types/text_generation.py +29 -0
  20. huggingface_hub/lfs.py +11 -6
  21. huggingface_hub/repocard_data.py +3 -3
  22. huggingface_hub/repository.py +6 -6
  23. huggingface_hub/serialization/__init__.py +8 -3
  24. huggingface_hub/serialization/_base.py +13 -16
  25. huggingface_hub/serialization/_tensorflow.py +4 -3
  26. huggingface_hub/serialization/_torch.py +399 -22
  27. huggingface_hub/utils/__init__.py +0 -1
  28. huggingface_hub/utils/_errors.py +1 -1
  29. huggingface_hub/utils/_fixes.py +14 -3
  30. huggingface_hub/utils/_paths.py +17 -6
  31. huggingface_hub/utils/_subprocess.py +0 -1
  32. huggingface_hub/utils/_telemetry.py +9 -1
  33. huggingface_hub/utils/endpoint_helpers.py +2 -186
  34. huggingface_hub/utils/sha.py +36 -1
  35. huggingface_hub/utils/tqdm.py +0 -1
  36. {huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.0rc0.dist-info}/METADATA +12 -9
  37. {huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.0rc0.dist-info}/RECORD +41 -41
  38. huggingface_hub/serialization/_numpy.py +0 -68
  39. {huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.0rc0.dist-info}/LICENSE +0 -0
  40. {huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.0rc0.dist-info}/WHEEL +0 -0
  41. {huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.0rc0.dist-info}/entry_points.txt +0 -0
  42. {huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.0rc0.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import contextlib
1
2
  import copy
2
3
  import errno
3
4
  import fnmatch
@@ -76,6 +77,7 @@ from .utils import (
76
77
  tqdm,
77
78
  validate_hf_hub_args,
78
79
  )
80
+ from .utils._deprecation import _deprecate_arguments, _deprecate_method
79
81
  from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility
80
82
  from .utils._typing import HTTP_METHOD_T
81
83
  from .utils.insecure_hashlib import sha256
@@ -272,6 +274,7 @@ def hf_hub_url(
272
274
  return url
273
275
 
274
276
 
277
+ @_deprecate_method(version="0.26", message="Use `hf_hub_download` to benefit from the new cache layout.")
275
278
  def url_to_filename(url: str, etag: Optional[str] = None) -> str:
276
279
  """Generate a local filename from a url.
277
280
 
@@ -303,6 +306,7 @@ def url_to_filename(url: str, etag: Optional[str] = None) -> str:
303
306
  return filename
304
307
 
305
308
 
309
+ @_deprecate_method(version="0.26", message="Use `hf_hub_url` instead.")
306
310
  def filename_to_url(
307
311
  filename,
308
312
  cache_dir: Optional[str] = None,
@@ -487,9 +491,8 @@ def http_get(
487
491
  )
488
492
 
489
493
  # Stream file to buffer
490
- progress = _tqdm_bar
491
- if progress is None:
492
- progress = tqdm(
494
+ progress_cm: tqdm = (
495
+ tqdm( # type: ignore[assignment]
493
496
  unit="B",
494
497
  unit_scale=True,
495
498
  total=total,
@@ -500,71 +503,76 @@ def http_get(
500
503
  # see https://github.com/huggingface/huggingface_hub/pull/2000
501
504
  name="huggingface_hub.http_get",
502
505
  )
506
+ if _tqdm_bar is None
507
+ else contextlib.nullcontext(_tqdm_bar)
508
+ # ^ `contextlib.nullcontext` mimics a context manager that does nothing
509
+ # Makes it easier to use the same code path for both cases but in the later
510
+ # case, the progress bar is not closed when exiting the context manager.
511
+ )
503
512
 
504
- if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
505
- supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
506
- if not supports_callback:
507
- warnings.warn(
508
- "You are using an outdated version of `hf_transfer`. "
509
- "Consider upgrading to latest version to enable progress bars "
510
- "using `pip install -U hf_transfer`."
511
- )
513
+ with progress_cm as progress:
514
+ if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
515
+ supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
516
+ if not supports_callback:
517
+ warnings.warn(
518
+ "You are using an outdated version of `hf_transfer`. "
519
+ "Consider upgrading to latest version to enable progress bars "
520
+ "using `pip install -U hf_transfer`."
521
+ )
522
+ try:
523
+ hf_transfer.download(
524
+ url=url,
525
+ filename=temp_file.name,
526
+ max_files=HF_TRANSFER_CONCURRENCY,
527
+ chunk_size=DOWNLOAD_CHUNK_SIZE,
528
+ headers=headers,
529
+ parallel_failures=3,
530
+ max_retries=5,
531
+ **({"callback": progress.update} if supports_callback else {}),
532
+ )
533
+ except Exception as e:
534
+ raise RuntimeError(
535
+ "An error occurred while downloading using `hf_transfer`. Consider"
536
+ " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
537
+ ) from e
538
+ if not supports_callback:
539
+ progress.update(total)
540
+ if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
541
+ raise EnvironmentError(
542
+ consistency_error_message.format(
543
+ actual_size=os.path.getsize(temp_file.name),
544
+ )
545
+ )
546
+ return
547
+ new_resume_size = resume_size
512
548
  try:
513
- hf_transfer.download(
549
+ for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
550
+ if chunk: # filter out keep-alive new chunks
551
+ progress.update(len(chunk))
552
+ temp_file.write(chunk)
553
+ new_resume_size += len(chunk)
554
+ # Some data has been downloaded from the server so we reset the number of retries.
555
+ _nb_retries = 5
556
+ except (requests.ConnectionError, requests.ReadTimeout) as e:
557
+ # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
558
+ # a transient error (network outage?). We log a warning message and try to resume the download a few times
559
+ # before giving up. Tre retry mechanism is basic but should be enough in most cases.
560
+ if _nb_retries <= 0:
561
+ logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
562
+ raise
563
+ logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
564
+ time.sleep(1)
565
+ reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
566
+ return http_get(
514
567
  url=url,
515
- filename=temp_file.name,
516
- max_files=HF_TRANSFER_CONCURRENCY,
517
- chunk_size=DOWNLOAD_CHUNK_SIZE,
518
- headers=headers,
519
- parallel_failures=3,
520
- max_retries=5,
521
- **({"callback": progress.update} if supports_callback else {}),
522
- )
523
- except Exception as e:
524
- raise RuntimeError(
525
- "An error occurred while downloading using `hf_transfer`. Consider"
526
- " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
527
- ) from e
528
- if not supports_callback:
529
- progress.update(total)
530
- if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
531
- raise EnvironmentError(
532
- consistency_error_message.format(
533
- actual_size=os.path.getsize(temp_file.name),
534
- )
568
+ temp_file=temp_file,
569
+ proxies=proxies,
570
+ resume_size=new_resume_size,
571
+ headers=initial_headers,
572
+ expected_size=expected_size,
573
+ _nb_retries=_nb_retries - 1,
574
+ _tqdm_bar=_tqdm_bar,
535
575
  )
536
- return
537
- new_resume_size = resume_size
538
- try:
539
- for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
540
- if chunk: # filter out keep-alive new chunks
541
- progress.update(len(chunk))
542
- temp_file.write(chunk)
543
- new_resume_size += len(chunk)
544
- # Some data has been downloaded from the server so we reset the number of retries.
545
- _nb_retries = 5
546
- except (requests.ConnectionError, requests.ReadTimeout) as e:
547
- # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
548
- # a transient error (network outage?). We log a warning message and try to resume the download a few times
549
- # before giving up. Tre retry mechanism is basic but should be enough in most cases.
550
- if _nb_retries <= 0:
551
- logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
552
- raise
553
- logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
554
- time.sleep(1)
555
- reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
556
- return http_get(
557
- url=url,
558
- temp_file=temp_file,
559
- proxies=proxies,
560
- resume_size=new_resume_size,
561
- headers=initial_headers,
562
- expected_size=expected_size,
563
- _nb_retries=_nb_retries - 1,
564
- _tqdm_bar=_tqdm_bar,
565
- )
566
-
567
- progress.close()
568
576
 
569
577
  if expected_size is not None and expected_size != temp_file.tell():
570
578
  raise EnvironmentError(
@@ -575,6 +583,7 @@ def http_get(
575
583
 
576
584
 
577
585
  @validate_hf_hub_args
586
+ @_deprecate_method(version="0.26", message="Use `hf_hub_download` instead.")
578
587
  def cached_download(
579
588
  url: str,
580
589
  *,
@@ -989,6 +998,14 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
989
998
  pass
990
999
 
991
1000
 
1001
+ @_deprecate_arguments(
1002
+ version="0.26.0",
1003
+ deprecated_args=["legacy_cache_layout"],
1004
+ custom_message=(
1005
+ "Legacy cache layout has been deprecated since August 2022 and will soon be removed. "
1006
+ "See https://huggingface.co/docs/huggingface_hub/guides/manage-cache for more details."
1007
+ ),
1008
+ )
992
1009
  @validate_hf_hub_args
993
1010
  def hf_hub_download(
994
1011
  repo_id: str,
@@ -1046,7 +1063,7 @@ def hf_hub_download(
1046
1063
  ```
1047
1064
 
1048
1065
  If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
1049
- option, the `cache_dir` will not be used and a `.huggingface/` folder will be created at the root of `local_dir`
1066
+ option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
1050
1067
  to store some metadata related to the downloaded files. While this mechanism is not as robust as the main
1051
1068
  cache-system, it's optimized for regularly pulling the latest version of a repository.
1052
1069
 
@@ -1101,21 +1118,22 @@ def hf_hub_download(
1101
1118
  `str`: Local path of file or if networking is off, last version of file cached on disk.
1102
1119
 
1103
1120
  Raises:
1104
- - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
1105
- if `token=True` and the token cannot be found.
1106
- - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
1107
- if ETag cannot be determined.
1108
- - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
1109
- if some parameter value is invalid
1110
- - [`~utils.RepositoryNotFoundError`]
1111
- If the repository to download from cannot be found. This may be because it doesn't exist,
1112
- or because it is set to `private` and you do not have access.
1113
- - [`~utils.RevisionNotFoundError`]
1114
- If the revision to download from cannot be found.
1115
- - [`~utils.EntryNotFoundError`]
1116
- If the file to download cannot be found.
1117
- - [`~utils.LocalEntryNotFoundError`]
1118
- If network is disabled or unavailable and file is not found in cache.
1121
+ [`~utils.RepositoryNotFoundError`]
1122
+ If the repository to download from cannot be found. This may be because it doesn't exist,
1123
+ or because it is set to `private` and you do not have access.
1124
+ [`~utils.RevisionNotFoundError`]
1125
+ If the revision to download from cannot be found.
1126
+ [`~utils.EntryNotFoundError`]
1127
+ If the file to download cannot be found.
1128
+ [`~utils.LocalEntryNotFoundError`]
1129
+ If network is disabled or unavailable and file is not found in cache.
1130
+ [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
1131
+ If `token=True` but the token cannot be found.
1132
+ [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
1133
+ If ETag cannot be determined.
1134
+ [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
1135
+ If some parameter value is invalid.
1136
+
1119
1137
  """
1120
1138
  if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
1121
1139
  # Respect environment variable above user value
@@ -1208,10 +1226,11 @@ def hf_hub_download(
1208
1226
  filename=filename,
1209
1227
  revision=revision,
1210
1228
  # HTTP info
1211
- proxies=proxies,
1229
+ endpoint=endpoint,
1212
1230
  etag_timeout=etag_timeout,
1213
1231
  headers=headers,
1214
- endpoint=endpoint,
1232
+ proxies=proxies,
1233
+ token=token,
1215
1234
  # Additional options
1216
1235
  cache_dir=cache_dir,
1217
1236
  force_download=force_download,
@@ -1227,10 +1246,11 @@ def hf_hub_download(
1227
1246
  repo_type=repo_type,
1228
1247
  revision=revision,
1229
1248
  # HTTP info
1249
+ endpoint=endpoint,
1250
+ etag_timeout=etag_timeout,
1230
1251
  headers=headers,
1231
1252
  proxies=proxies,
1232
- etag_timeout=etag_timeout,
1233
- endpoint=endpoint,
1253
+ token=token,
1234
1254
  # Additional options
1235
1255
  local_files_only=local_files_only,
1236
1256
  force_download=force_download,
@@ -1247,10 +1267,11 @@ def _hf_hub_download_to_cache_dir(
1247
1267
  repo_type: str,
1248
1268
  revision: str,
1249
1269
  # HTTP info
1270
+ endpoint: Optional[str],
1271
+ etag_timeout: float,
1250
1272
  headers: Dict[str, str],
1251
1273
  proxies: Optional[Dict],
1252
- etag_timeout: float,
1253
- endpoint: Optional[str],
1274
+ token: Optional[Union[bool, str]],
1254
1275
  # Additional options
1255
1276
  local_files_only: bool,
1256
1277
  force_download: bool,
@@ -1288,6 +1309,7 @@ def _hf_hub_download_to_cache_dir(
1288
1309
  proxies=proxies,
1289
1310
  etag_timeout=etag_timeout,
1290
1311
  headers=headers,
1312
+ token=token,
1291
1313
  local_files_only=local_files_only,
1292
1314
  storage_folder=storage_folder,
1293
1315
  relative_filename=relative_filename,
@@ -1355,7 +1377,7 @@ def _hf_hub_download_to_cache_dir(
1355
1377
  lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
1356
1378
 
1357
1379
  # Some Windows versions do not allow for paths longer than 255 characters.
1358
- # In this case, we must specify it is an extended path by using the "\\?\" prefix.
1380
+ # In this case, we must specify it as an extended path by using the "\\?\" prefix.
1359
1381
  if os.name == "nt" and len(os.path.abspath(lock_path)) > 255:
1360
1382
  lock_path = "\\\\?\\" + os.path.abspath(lock_path)
1361
1383
 
@@ -1389,10 +1411,11 @@ def _hf_hub_download_to_local_dir(
1389
1411
  filename: str,
1390
1412
  revision: str,
1391
1413
  # HTTP info
1392
- proxies: Optional[Dict],
1414
+ endpoint: Optional[str],
1393
1415
  etag_timeout: float,
1394
1416
  headers: Dict[str, str],
1395
- endpoint: Optional[str],
1417
+ proxies: Optional[Dict],
1418
+ token: Union[bool, str, None],
1396
1419
  # Additional options
1397
1420
  cache_dir: str,
1398
1421
  force_download: bool,
@@ -1402,6 +1425,10 @@ def _hf_hub_download_to_local_dir(
1402
1425
 
1403
1426
  Method should not be called directly. Please use `hf_hub_download` instead.
1404
1427
  """
1428
+ # Some Windows versions do not allow for paths longer than 255 characters.
1429
+ # In this case, we must specify it as an extended path by using the "\\?\" prefix.
1430
+ if os.name == "nt" and len(os.path.abspath(local_dir)) > 255:
1431
+ local_dir = "\\\\?\\" + os.path.abspath(local_dir)
1405
1432
  local_dir = Path(local_dir)
1406
1433
  paths = get_local_download_paths(local_dir=local_dir, filename=filename)
1407
1434
  local_metadata = read_download_metadata(local_dir=local_dir, filename=filename)
@@ -1426,6 +1453,7 @@ def _hf_hub_download_to_local_dir(
1426
1453
  proxies=proxies,
1427
1454
  etag_timeout=etag_timeout,
1428
1455
  headers=headers,
1456
+ token=token,
1429
1457
  local_files_only=local_files_only,
1430
1458
  )
1431
1459
 
@@ -1677,6 +1705,7 @@ def _get_metadata_or_catch_error(
1677
1705
  proxies: Optional[Dict],
1678
1706
  etag_timeout: Optional[float],
1679
1707
  headers: Dict[str, str], # mutated inplace!
1708
+ token: Union[bool, str, None],
1680
1709
  local_files_only: bool,
1681
1710
  relative_filename: Optional[str] = None, # only used to store `.no_exists` in cache
1682
1711
  storage_folder: Optional[str] = None, # only used to store `.no_exists` in cache
@@ -1719,7 +1748,9 @@ def _get_metadata_or_catch_error(
1719
1748
  if not local_files_only:
1720
1749
  try:
1721
1750
  try:
1722
- metadata = get_hf_file_metadata(url=url, proxies=proxies, timeout=etag_timeout, headers=headers)
1751
+ metadata = get_hf_file_metadata(
1752
+ url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token
1753
+ )
1723
1754
  except EntryNotFoundError as http_error:
1724
1755
  if storage_folder is not None and relative_filename is not None:
1725
1756
  # Cache the non-existence of the file
@@ -1921,7 +1952,12 @@ def _chmod_and_move(src: Path, dst: Path) -> None:
1921
1952
  cache_dir_mode = Path(tmp_file).stat().st_mode
1922
1953
  os.chmod(str(src), stat.S_IMODE(cache_dir_mode))
1923
1954
  finally:
1924
- tmp_file.unlink()
1955
+ try:
1956
+ tmp_file.unlink()
1957
+ except OSError:
1958
+ # fails if `tmp_file.touch()` failed => do nothing
1959
+ # See https://github.com/huggingface/huggingface_hub/issues/2359
1960
+ pass
1925
1961
 
1926
1962
  shutil.move(str(src), str(dst))
1927
1963