huggingface-hub 1.0.0rc5__py3-none-any.whl → 1.0.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (36) hide show
  1. huggingface_hub/__init__.py +12 -1
  2. huggingface_hub/_commit_api.py +1 -5
  3. huggingface_hub/_jobs_api.py +1 -1
  4. huggingface_hub/_login.py +3 -3
  5. huggingface_hub/_snapshot_download.py +4 -3
  6. huggingface_hub/_upload_large_folder.py +2 -15
  7. huggingface_hub/_webhooks_server.py +1 -1
  8. huggingface_hub/cli/_cli_utils.py +1 -1
  9. huggingface_hub/cli/auth.py +0 -20
  10. huggingface_hub/cli/cache.py +561 -304
  11. huggingface_hub/cli/download.py +2 -2
  12. huggingface_hub/cli/repo.py +0 -7
  13. huggingface_hub/cli/upload.py +0 -8
  14. huggingface_hub/community.py +16 -8
  15. huggingface_hub/constants.py +10 -11
  16. huggingface_hub/file_download.py +9 -61
  17. huggingface_hub/hf_api.py +170 -126
  18. huggingface_hub/hf_file_system.py +31 -6
  19. huggingface_hub/inference/_client.py +1 -1
  20. huggingface_hub/inference/_generated/_async_client.py +1 -1
  21. huggingface_hub/inference/_providers/__init__.py +15 -2
  22. huggingface_hub/inference/_providers/_common.py +39 -0
  23. huggingface_hub/inference/_providers/clarifai.py +13 -0
  24. huggingface_hub/lfs.py +3 -65
  25. huggingface_hub/serialization/_torch.py +1 -1
  26. huggingface_hub/utils/__init__.py +0 -2
  27. huggingface_hub/utils/_cache_manager.py +17 -42
  28. huggingface_hub/utils/_http.py +25 -3
  29. huggingface_hub/utils/_parsing.py +98 -0
  30. huggingface_hub/utils/_runtime.py +1 -14
  31. {huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/METADATA +4 -14
  32. {huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/RECORD +36 -34
  33. {huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/LICENSE +0 -0
  34. {huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/WHEEL +0 -0
  35. {huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/entry_points.txt +0 -0
  36. {huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/top_level.txt +0 -0
@@ -134,7 +134,7 @@ def download(
134
134
  force_download=force_download,
135
135
  token=token,
136
136
  local_dir=local_dir,
137
- library_name="hf",
137
+ library_name="huggingface-cli",
138
138
  dry_run=dry_run,
139
139
  )
140
140
 
@@ -156,7 +156,7 @@ def download(
156
156
  cache_dir=cache_dir,
157
157
  token=token,
158
158
  local_dir=local_dir,
159
- library_name="hf",
159
+ library_name="huggingface-cli",
160
160
  max_workers=max_workers,
161
161
  dry_run=dry_run,
162
162
  )
@@ -147,12 +147,6 @@ def repo_settings(
147
147
  help="Whether the repository should be private.",
148
148
  ),
149
149
  ] = None,
150
- xet_enabled: Annotated[
151
- Optional[bool],
152
- typer.Option(
153
- help=" Whether the repository should be enabled for Xet Storage.",
154
- ),
155
- ] = None,
156
150
  token: TokenOpt = None,
157
151
  repo_type: RepoTypeOpt = RepoType.model,
158
152
  ) -> None:
@@ -161,7 +155,6 @@ def repo_settings(
161
155
  repo_id=repo_id,
162
156
  gated=(gated.value if gated else None), # type: ignore [arg-type]
163
157
  private=private,
164
- xet_enabled=xet_enabled,
165
158
  repo_type=repo_type.value,
166
159
  )
167
160
  print(f"Successfully updated the settings of {ANSI.bold(repo_id)} on the Hub.")
@@ -55,10 +55,8 @@ import typer
55
55
 
56
56
  from huggingface_hub import logging
57
57
  from huggingface_hub._commit_scheduler import CommitScheduler
58
- from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER
59
58
  from huggingface_hub.errors import RevisionNotFoundError
60
59
  from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
61
- from huggingface_hub.utils._runtime import is_xet_available
62
60
 
63
61
  from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
64
62
 
@@ -156,12 +154,6 @@ def upload(
156
154
  if delete is not None and len(delete) > 0:
157
155
  warnings.warn("Ignoring --delete since a single file is uploaded.")
158
156
 
159
- if not is_xet_available() and not HF_HUB_ENABLE_HF_TRANSFER:
160
- logger.info(
161
- "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See"
162
- " https://huggingface.co/docs/huggingface_hub/hf_transfer for more details."
163
- )
164
-
165
157
  # Schedule commits if `every` is set
166
158
  if every is not None:
167
159
  if os.path.isfile(resolved_local_path):
@@ -7,7 +7,7 @@ for more information on Pull Requests, Discussions, and the community tab.
7
7
 
8
8
  from dataclasses import dataclass
9
9
  from datetime import datetime
10
- from typing import Literal, Optional, Union
10
+ from typing import Literal, Optional, TypedDict, Union
11
11
 
12
12
  from . import constants
13
13
  from .utils import parse_datetime
@@ -143,6 +143,14 @@ class DiscussionWithDetails(Discussion):
143
143
  diff: Optional[str]
144
144
 
145
145
 
146
+ class DiscussionEventArgs(TypedDict):
147
+ id: str
148
+ type: str
149
+ created_at: datetime
150
+ author: str
151
+ _event: dict
152
+
153
+
146
154
  @dataclass
147
155
  class DiscussionEvent:
148
156
  """
@@ -319,13 +327,13 @@ def deserialize_event(event: dict) -> DiscussionEvent:
319
327
  event_type: str = event["type"]
320
328
  created_at = parse_datetime(event["createdAt"])
321
329
 
322
- common_args = dict(
323
- id=event_id,
324
- type=event_type,
325
- created_at=created_at,
326
- author=event.get("author", {}).get("name", "deleted"),
327
- _event=event,
328
- )
330
+ common_args: DiscussionEventArgs = {
331
+ "id": event_id,
332
+ "type": event_type,
333
+ "created_at": created_at,
334
+ "author": event.get("author", {}).get("name", "deleted"),
335
+ "_event": event,
336
+ }
329
337
 
330
338
  if event_type == "comment":
331
339
  return DiscussionComment(
@@ -35,7 +35,6 @@ DEFAULT_ETAG_TIMEOUT = 10
35
35
  DEFAULT_DOWNLOAD_TIMEOUT = 10
36
36
  DEFAULT_REQUEST_TIMEOUT = 10
37
37
  DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
38
- HF_TRANSFER_CONCURRENCY = 100
39
38
  MAX_HTTP_DOWNLOAD_SIZE = 50 * 1000 * 1000 * 1000 # 50 GB
40
39
 
41
40
  # Constants for serialization
@@ -215,18 +214,18 @@ HF_HUB_DISABLE_EXPERIMENTAL_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISA
215
214
  # Disable sending the cached token by default is all HTTP requests to the Hub
216
215
  HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN"))
217
216
 
218
- # Enable fast-download using external dependency "hf_transfer"
219
- # See:
220
- # - https://pypi.org/project/hf-transfer/
221
- # - https://github.com/huggingface/hf_transfer (private)
222
- HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER"))
217
+ HF_XET_HIGH_PERFORMANCE: bool = _is_true(os.environ.get("HF_XET_HIGH_PERFORMANCE"))
223
218
 
219
+ # hf_transfer is not used anymore. Let's warn user is case they set the env variable
220
+ if _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER")) and not HF_XET_HIGH_PERFORMANCE:
221
+ import warnings
224
222
 
225
- # UNUSED
226
- # We don't use symlinks in local dir anymore.
227
- HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD: int = (
228
- _as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024
229
- )
223
+ warnings.warn(
224
+ "The `HF_HUB_ENABLE_HF_TRANSFER` environment variable is deprecated as 'hf_transfer' is not used anymore. "
225
+ "Please use `HF_XET_HIGH_PERFORMANCE` instead to enable high performance transfer with Xet. "
226
+ "Visit https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfxethighperformance for more details.",
227
+ DeprecationWarning,
228
+ )
230
229
 
231
230
  # Used to override the etag timeout on a system level
232
231
  HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT
@@ -379,38 +379,16 @@ def http_get(
379
379
  # If the file is already fully downloaded, we don't need to download it again.
380
380
  return
381
381
 
382
- has_custom_range_header = headers is not None and any(h.lower() == "range" for h in headers)
383
- hf_transfer = None
384
- if constants.HF_HUB_ENABLE_HF_TRANSFER:
385
- if resume_size != 0:
386
- warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
387
- elif has_custom_range_header:
388
- warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
389
- else:
390
- try:
391
- import hf_transfer # type: ignore[no-redef]
392
- except ImportError:
393
- raise ValueError(
394
- "Fast download using 'hf_transfer' is enabled"
395
- " (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not"
396
- " available in your environment. Try `pip install hf_transfer`."
397
- )
398
-
399
382
  initial_headers = headers
400
383
  headers = copy.deepcopy(headers) or {}
401
384
  if resume_size > 0:
402
385
  headers["Range"] = _adjust_range_header(headers.get("Range"), resume_size)
403
386
  elif expected_size and expected_size > constants.MAX_HTTP_DOWNLOAD_SIZE:
404
- # Any files over 50GB will not be available through basic http request.
405
- # Setting the range header to 0-0 will force the server to return the file size in the Content-Range header.
406
- # Since hf_transfer splits the download into chunks, the process will succeed afterwards.
407
- if hf_transfer:
408
- headers["Range"] = "bytes=0-0"
409
- else:
410
- raise ValueError(
411
- "The file is too large to be downloaded using the regular download method. Use `hf_transfer` or `hf_xet` instead."
412
- " Try `pip install hf_transfer` or `pip install hf_xet`."
413
- )
387
+ # Any files over 50GB will not be available through basic http requests.
388
+ raise ValueError(
389
+ "The file is too large to be downloaded using the regular download method. "
390
+ " Install `hf_xet` with `pip install hf_xet` for xet-powered downloads."
391
+ )
414
392
 
415
393
  with http_stream_backoff(
416
394
  method="GET",
@@ -451,31 +429,6 @@ def http_get(
451
429
  )
452
430
 
453
431
  with progress_cm as progress:
454
- if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
455
- try:
456
- hf_transfer.download(
457
- url=url,
458
- filename=temp_file.name,
459
- max_files=constants.HF_TRANSFER_CONCURRENCY,
460
- chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
461
- headers=initial_headers,
462
- parallel_failures=3,
463
- max_retries=5,
464
- callback=progress.update,
465
- )
466
- except Exception as e:
467
- raise RuntimeError(
468
- "An error occurred while downloading using `hf_transfer`. Consider"
469
- " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
470
- ) from e
471
- if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
472
- raise EnvironmentError(
473
- consistency_error_message.format(
474
- actual_size=os.path.getsize(temp_file.name),
475
- )
476
- )
477
- return
478
-
479
432
  new_resume_size = resume_size
480
433
  try:
481
434
  for chunk in response.iter_bytes(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
@@ -1780,7 +1733,7 @@ def _download_to_tmp_and_move(
1780
1733
  Internal logic:
1781
1734
  - return early if file is already downloaded
1782
1735
  - resume download if possible (from incomplete file)
1783
- - do not resume download if `force_download=True` or `HF_HUB_ENABLE_HF_TRANSFER=True`
1736
+ - do not resume download if `force_download=True`
1784
1737
  - check disk space before downloading
1785
1738
  - download content to a temporary file
1786
1739
  - set correct permissions on temporary file
@@ -1792,16 +1745,11 @@ def _download_to_tmp_and_move(
1792
1745
  # Do nothing if already exists (except if force_download=True)
1793
1746
  return
1794
1747
 
1795
- if incomplete_path.exists() and (force_download or constants.HF_HUB_ENABLE_HF_TRANSFER):
1748
+ if incomplete_path.exists() and force_download:
1796
1749
  # By default, we will try to resume the download if possible.
1797
- # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
1750
+ # However, if the user has set `force_download=True`, then we should
1798
1751
  # not resume the download => delete the incomplete file.
1799
- message = f"Removing incomplete file '{incomplete_path}'"
1800
- if force_download:
1801
- message += " (force_download=True)"
1802
- elif constants.HF_HUB_ENABLE_HF_TRANSFER:
1803
- message += " (hf_transfer=True)"
1804
- logger.info(message)
1752
+ logger.info(f"Removing incomplete file '{incomplete_path}' (force_download=True)")
1805
1753
  incomplete_path.unlink(missing_ok=True)
1806
1754
 
1807
1755
  with incomplete_path.open("ab") as f: