huggingface-hub 0.29.3rc0__py3-none-any.whl → 0.30.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (40) hide show
  1. huggingface_hub/__init__.py +16 -1
  2. huggingface_hub/_commit_api.py +142 -4
  3. huggingface_hub/_space_api.py +15 -2
  4. huggingface_hub/_webhooks_server.py +2 -0
  5. huggingface_hub/commands/delete_cache.py +66 -20
  6. huggingface_hub/commands/upload.py +16 -2
  7. huggingface_hub/constants.py +44 -7
  8. huggingface_hub/errors.py +19 -0
  9. huggingface_hub/file_download.py +163 -35
  10. huggingface_hub/hf_api.py +349 -28
  11. huggingface_hub/hub_mixin.py +19 -4
  12. huggingface_hub/inference/_client.py +50 -69
  13. huggingface_hub/inference/_generated/_async_client.py +57 -76
  14. huggingface_hub/inference/_generated/types/__init__.py +1 -0
  15. huggingface_hub/inference/_generated/types/chat_completion.py +20 -10
  16. huggingface_hub/inference/_generated/types/image_to_image.py +2 -0
  17. huggingface_hub/inference/_providers/__init__.py +7 -1
  18. huggingface_hub/inference/_providers/_common.py +9 -5
  19. huggingface_hub/inference/_providers/black_forest_labs.py +5 -5
  20. huggingface_hub/inference/_providers/cohere.py +1 -1
  21. huggingface_hub/inference/_providers/fal_ai.py +64 -7
  22. huggingface_hub/inference/_providers/fireworks_ai.py +4 -1
  23. huggingface_hub/inference/_providers/hf_inference.py +41 -4
  24. huggingface_hub/inference/_providers/hyperbolic.py +3 -3
  25. huggingface_hub/inference/_providers/nebius.py +3 -3
  26. huggingface_hub/inference/_providers/novita.py +35 -5
  27. huggingface_hub/inference/_providers/openai.py +22 -0
  28. huggingface_hub/inference/_providers/replicate.py +3 -3
  29. huggingface_hub/inference/_providers/together.py +3 -3
  30. huggingface_hub/utils/__init__.py +8 -0
  31. huggingface_hub/utils/_http.py +4 -1
  32. huggingface_hub/utils/_runtime.py +11 -0
  33. huggingface_hub/utils/_xet.py +199 -0
  34. huggingface_hub/utils/tqdm.py +30 -2
  35. {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc0.dist-info}/METADATA +3 -1
  36. {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc0.dist-info}/RECORD +40 -38
  37. {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc0.dist-info}/LICENSE +0 -0
  38. {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc0.dist-info}/WHEEL +0 -0
  39. {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc0.dist-info}/entry_points.txt +0 -0
  40. {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/errors.py CHANGED
@@ -327,3 +327,22 @@ class DDUFExportError(DDUFError):
327
327
 
328
328
  class DDUFInvalidEntryNameError(DDUFExportError):
329
329
  """Exception thrown when the entry name is invalid."""
330
+
331
+
332
+ # XET ERRORS
333
+
334
+
335
+ class XetError(Exception):
336
+ """Base exception for errors related to Xet Storage."""
337
+
338
+
339
+ class XetAuthorizationError(XetError):
340
+ """Exception thrown when the user does not have the right authorization to use Xet Storage."""
341
+
342
+
343
+ class XetRefreshTokenError(XetError):
344
+ """Exception thrown when the refresh token is invalid."""
345
+
346
+
347
+ class XetDownloadError(Exception):
348
+ """Exception thrown when the download from Xet Storage fails."""
@@ -1,4 +1,3 @@
1
- import contextlib
2
1
  import copy
3
2
  import errno
4
3
  import inspect
@@ -38,6 +37,7 @@ from .utils import (
38
37
  OfflineModeIsEnabled,
39
38
  SoftTemporaryDirectory,
40
39
  WeakFileLock,
40
+ XetFileData,
41
41
  build_hf_headers,
42
42
  get_fastai_version, # noqa: F401 # for backward compatibility
43
43
  get_fastcore_version, # noqa: F401 # for backward compatibility
@@ -56,15 +56,17 @@ from .utils import (
56
56
  is_tf_available, # noqa: F401 # for backward compatibility
57
57
  is_torch_available, # noqa: F401 # for backward compatibility
58
58
  logging,
59
+ parse_xet_file_data_from_response,
60
+ refresh_xet_connection_info,
59
61
  reset_sessions,
60
62
  tqdm,
61
63
  validate_hf_hub_args,
62
64
  )
63
65
  from .utils._http import _adjust_range_header
64
- from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility
66
+ from .utils._runtime import _PY_VERSION, is_xet_available # noqa: F401 # for backward compatibility
65
67
  from .utils._typing import HTTP_METHOD_T
66
68
  from .utils.sha import sha_fileobj
67
- from .utils.tqdm import is_tqdm_disabled
69
+ from .utils.tqdm import _get_progress_bar_context
68
70
 
69
71
 
70
72
  logger = logging.get_logger(__name__)
@@ -160,12 +162,15 @@ class HfFileMetadata:
160
162
  size (`size`):
161
163
  Size of the file. In case of an LFS file, contains the size of the actual
162
164
  LFS file, not the pointer.
165
+ xet_file_data (`XetFileData`, *optional*):
166
+ Xet information for the file. This is only set if the file is stored using Xet storage.
163
167
  """
164
168
 
165
169
  commit_hash: Optional[str]
166
170
  etag: Optional[str]
167
171
  location: str
168
172
  size: Optional[int]
173
+ xet_file_data: Optional[XetFileData]
169
174
 
170
175
 
171
176
  @validate_hf_hub_args
@@ -396,23 +401,13 @@ def http_get(
396
401
  f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
397
402
  " Please retry with `force_download=True`."
398
403
  )
399
-
400
- # Stream file to buffer
401
- progress_cm: tqdm = (
402
- tqdm( # type: ignore[assignment]
403
- unit="B",
404
- unit_scale=True,
405
- total=total,
406
- initial=resume_size,
407
- desc=displayed_filename,
408
- disable=is_tqdm_disabled(logger.getEffectiveLevel()),
409
- name="huggingface_hub.http_get",
410
- )
411
- if _tqdm_bar is None
412
- else contextlib.nullcontext(_tqdm_bar)
413
- # ^ `contextlib.nullcontext` mimics a context manager that does nothing
414
- # Makes it easier to use the same code path for both cases but in the later
415
- # case, the progress bar is not closed when exiting the context manager.
404
+ progress_cm = _get_progress_bar_context(
405
+ desc=displayed_filename,
406
+ log_level=logger.getEffectiveLevel(),
407
+ total=total,
408
+ initial=resume_size,
409
+ name="huggingface_hub.http_get",
410
+ _tqdm_bar=_tqdm_bar,
416
411
  )
417
412
 
418
413
  with progress_cm as progress:
@@ -487,6 +482,110 @@ def http_get(
487
482
  )
488
483
 
489
484
 
485
+ def xet_get(
486
+ *,
487
+ incomplete_path: Path,
488
+ xet_file_data: XetFileData,
489
+ headers: Dict[str, str],
490
+ expected_size: Optional[int] = None,
491
+ displayed_filename: Optional[str] = None,
492
+ _tqdm_bar: Optional[tqdm] = None,
493
+ ) -> None:
494
+ """
495
+ Download a file using Xet storage service.
496
+
497
+ Args:
498
+ incomplete_path (`Path`):
499
+ The path to the file to download.
500
+ xet_file_data (`XetFileData`):
501
+ The file metadata needed to make the request to the xet storage service.
502
+ headers (`Dict[str, str]`):
503
+ The headers to send to the xet storage service.
504
+ expected_size (`int`, *optional*):
505
+ The expected size of the file to download. If set, the download will raise an error if the size of the
506
+ received content is different from the expected one.
507
+ displayed_filename (`str`, *optional*):
508
+ The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If
509
+ not set, the filename is guessed from the URL or the `Content-Disposition` header.
510
+
511
+ **How it works:**
512
+ The file download system uses Xet storage, which is a content-addressable storage system that breaks files into chunks
513
+ for efficient storage and transfer.
514
+
515
+ `hf_xet.download_files` manages downloading files by:
516
+ - Taking a list of files to download (each with its unique content hash)
517
+ - Connecting to a storage server (CAS server) that knows how files are chunked
518
+ - Using authentication to ensure secure access
519
+ - Providing progress updates during download
520
+
521
+ Authentication works by regularly refreshing access tokens through `refresh_xet_connection_info` to maintain a valid
522
+ connection to the storage server.
523
+
524
+ The download process works like this:
525
+ 1. Create a local cache folder at `~/.cache/huggingface/xet/chunk-cache` to store reusable file chunks
526
+ 2. Download files in parallel:
527
+ 2.1. Prepare to write the file to disk
528
+ 2.2. Ask the server "how is this file split into chunks?" using the file's unique hash
529
+ The server responds with:
530
+ - Which chunks make up the complete file
531
+ - Where each chunk can be downloaded from
532
+ 2.3. For each needed chunk:
533
+ - Checks if we already have it in our local cache
534
+ - If not, download it from cloud storage (S3)
535
+ - Save it to cache for future use
536
+ - Assemble the chunks in order to recreate the original file
537
+
538
+ """
539
+ try:
540
+ from hf_xet import PyPointerFile, download_files # type: ignore[no-redef]
541
+ except ImportError:
542
+ raise ValueError(
543
+ "To use optimized download using Xet storage, you need to install the hf_xet package. "
544
+ "Try `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`."
545
+ )
546
+
547
+ connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
548
+
549
+ def token_refresher() -> Tuple[str, int]:
550
+ connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
551
+ if connection_info is None:
552
+ raise ValueError("Failed to refresh token using xet metadata.")
553
+ return connection_info.access_token, connection_info.expiration_unix_epoch
554
+
555
+ pointer_files = [
556
+ PyPointerFile(path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, filesize=expected_size)
557
+ ]
558
+
559
+ if not displayed_filename:
560
+ displayed_filename = incomplete_path.name
561
+
562
+ # Truncate filename if too long to display
563
+ if len(displayed_filename) > 40:
564
+ displayed_filename = f"{displayed_filename[:40]}(…)"
565
+
566
+ progress_cm = _get_progress_bar_context(
567
+ desc=displayed_filename,
568
+ log_level=logger.getEffectiveLevel(),
569
+ total=expected_size,
570
+ initial=0,
571
+ name="huggingface_hub.xet_get",
572
+ _tqdm_bar=_tqdm_bar,
573
+ )
574
+
575
+ with progress_cm as progress:
576
+
577
+ def progress_updater(progress_bytes: float):
578
+ progress.update(progress_bytes)
579
+
580
+ download_files(
581
+ pointer_files,
582
+ endpoint=connection_info.endpoint,
583
+ token_info=(connection_info.access_token, connection_info.expiration_unix_epoch),
584
+ token_refresher=token_refresher,
585
+ progress_updater=[progress_updater],
586
+ )
587
+
588
+
490
589
  def _normalize_etag(etag: Optional[str]) -> Optional[str]:
491
590
  """Normalize ETag HTTP header, so it can be used to create nice filepaths.
492
591
 
@@ -922,7 +1021,7 @@ def _hf_hub_download_to_cache_dir(
922
1021
 
923
1022
  # Try to get metadata (etag, commit_hash, url, size) from the server.
924
1023
  # If we can't, a HEAD request error is returned.
925
- (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error(
1024
+ (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
926
1025
  repo_id=repo_id,
927
1026
  filename=filename,
928
1027
  repo_type=repo_type,
@@ -1006,6 +1105,8 @@ def _hf_hub_download_to_cache_dir(
1006
1105
  if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
1007
1106
  blob_path = "\\\\?\\" + os.path.abspath(blob_path)
1008
1107
 
1108
+ # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
1109
+
1009
1110
  Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
1010
1111
  with WeakFileLock(lock_path):
1011
1112
  _download_to_tmp_and_move(
@@ -1017,6 +1118,8 @@ def _hf_hub_download_to_cache_dir(
1017
1118
  expected_size=expected_size,
1018
1119
  filename=filename,
1019
1120
  force_download=force_download,
1121
+ etag=etag,
1122
+ xet_file_data=xet_file_data,
1020
1123
  )
1021
1124
  if not os.path.exists(pointer_path):
1022
1125
  _create_symlink(blob_path, pointer_path, new_blob=True)
@@ -1067,7 +1170,7 @@ def _hf_hub_download_to_local_dir(
1067
1170
  return str(paths.file_path)
1068
1171
 
1069
1172
  # Local file doesn't exist or commit_hash doesn't match => we need the etag
1070
- (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error(
1173
+ (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
1071
1174
  repo_id=repo_id,
1072
1175
  filename=filename,
1073
1176
  repo_type=repo_type,
@@ -1144,6 +1247,8 @@ def _hf_hub_download_to_local_dir(
1144
1247
  expected_size=expected_size,
1145
1248
  filename=filename,
1146
1249
  force_download=force_download,
1250
+ etag=etag,
1251
+ xet_file_data=xet_file_data,
1147
1252
  )
1148
1253
 
1149
1254
  write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
@@ -1317,6 +1422,7 @@ def get_hf_file_metadata(
1317
1422
  size=_int_or_none(
1318
1423
  r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
1319
1424
  ),
1425
+ xet_file_data=parse_xet_file_data_from_response(r), # type: ignore
1320
1426
  )
1321
1427
 
1322
1428
 
@@ -1336,10 +1442,10 @@ def _get_metadata_or_catch_error(
1336
1442
  storage_folder: Optional[str] = None, # only used to store `.no_exists` in cache
1337
1443
  ) -> Union[
1338
1444
  # Either an exception is caught and returned
1339
- Tuple[None, None, None, None, Exception],
1445
+ Tuple[None, None, None, None, None, Exception],
1340
1446
  # Or the metadata is returned as
1341
- # `(url_to_download, etag, commit_hash, expected_size, None)`
1342
- Tuple[str, str, str, int, None],
1447
+ # `(url_to_download, etag, commit_hash, expected_size, xet_file_data, None)`
1448
+ Tuple[str, str, str, int, Optional[XetFileData], None],
1343
1449
  ]:
1344
1450
  """Get metadata for a file on the Hub, safely handling network issues.
1345
1451
 
@@ -1356,6 +1462,7 @@ def _get_metadata_or_catch_error(
1356
1462
  None,
1357
1463
  None,
1358
1464
  None,
1465
+ None,
1359
1466
  OfflineModeIsEnabled(
1360
1467
  f"Cannot access file since 'local_files_only=True' as been set. (repo_id: {repo_id}, repo_type: {repo_type}, revision: {revision}, filename: {filename})"
1361
1468
  ),
@@ -1367,6 +1474,7 @@ def _get_metadata_or_catch_error(
1367
1474
  commit_hash: Optional[str] = None
1368
1475
  expected_size: Optional[int] = None
1369
1476
  head_error_call: Optional[Exception] = None
1477
+ xet_file_data: Optional[XetFileData] = None
1370
1478
 
1371
1479
  # Try to get metadata from the server.
1372
1480
  # Do not raise yet if the file is not found or not accessible.
@@ -1414,13 +1522,15 @@ def _get_metadata_or_catch_error(
1414
1522
  if expected_size is None:
1415
1523
  raise FileMetadataError("Distant resource does not have a Content-Length.")
1416
1524
 
1525
+ xet_file_data = metadata.xet_file_data
1526
+
1417
1527
  # In case of a redirect, save an extra redirect on the request.get call,
1418
1528
  # and ensure we download the exact atomic version even if it changed
1419
1529
  # between the HEAD and the GET (unlikely, but hey).
1420
1530
  #
1421
1531
  # If url domain is different => we are downloading from a CDN => url is signed => don't send auth
1422
1532
  # If url domain is the same => redirect due to repo rename AND downloading a regular file => keep auth
1423
- if url != metadata.location:
1533
+ if xet_file_data is None and url != metadata.location:
1424
1534
  url_to_download = metadata.location
1425
1535
  if urlparse(url).netloc != urlparse(metadata.location).netloc:
1426
1536
  # Remove authorization header when downloading a LFS blob
@@ -1458,7 +1568,7 @@ def _get_metadata_or_catch_error(
1458
1568
  if not (local_files_only or etag is not None or head_error_call is not None):
1459
1569
  raise RuntimeError("etag is empty due to uncovered problems")
1460
1570
 
1461
- return (url_to_download, etag, commit_hash, expected_size, head_error_call) # type: ignore [return-value]
1571
+ return (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_error_call) # type: ignore [return-value]
1462
1572
 
1463
1573
 
1464
1574
  def _raise_on_head_call_error(head_call_error: Exception, force_download: bool, local_files_only: bool) -> NoReturn:
@@ -1502,6 +1612,8 @@ def _download_to_tmp_and_move(
1502
1612
  expected_size: Optional[int],
1503
1613
  filename: str,
1504
1614
  force_download: bool,
1615
+ etag: Optional[str],
1616
+ xet_file_data: Optional[XetFileData],
1505
1617
  ) -> None:
1506
1618
  """Download content from a URL to a destination path.
1507
1619
 
@@ -1544,14 +1656,30 @@ def _download_to_tmp_and_move(
1544
1656
  _check_disk_space(expected_size, incomplete_path.parent)
1545
1657
  _check_disk_space(expected_size, destination_path.parent)
1546
1658
 
1547
- http_get(
1548
- url_to_download,
1549
- f,
1550
- proxies=proxies,
1551
- resume_size=resume_size,
1552
- headers=headers,
1553
- expected_size=expected_size,
1554
- )
1659
+ if xet_file_data is not None and is_xet_available():
1660
+ logger.info("Xet Storage is enabled for this repo. Downloading file from Xet Storage..")
1661
+ xet_get(
1662
+ incomplete_path=incomplete_path,
1663
+ xet_file_data=xet_file_data,
1664
+ headers=headers,
1665
+ expected_size=expected_size,
1666
+ displayed_filename=filename,
1667
+ )
1668
+ else:
1669
+ if xet_file_data is not None:
1670
+ logger.warning(
1671
+ "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. "
1672
+ "Falling back to regular HTTP download. "
1673
+ "For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`"
1674
+ )
1675
+ http_get(
1676
+ url_to_download,
1677
+ f,
1678
+ proxies=proxies,
1679
+ resume_size=resume_size,
1680
+ headers=headers,
1681
+ expected_size=expected_size,
1682
+ )
1555
1683
 
1556
1684
  logger.info(f"Download complete. Moving file to {destination_path}")
1557
1685
  _chmod_and_move(incomplete_path, destination_path)