huggingface-hub 0.33.5__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (68) hide show
  1. huggingface_hub/__init__.py +487 -525
  2. huggingface_hub/_commit_api.py +21 -28
  3. huggingface_hub/_jobs_api.py +145 -0
  4. huggingface_hub/_local_folder.py +7 -1
  5. huggingface_hub/_login.py +5 -5
  6. huggingface_hub/_oauth.py +6 -10
  7. huggingface_hub/_snapshot_download.py +11 -6
  8. huggingface_hub/_upload_large_folder.py +46 -23
  9. huggingface_hub/cli/__init__.py +27 -0
  10. huggingface_hub/cli/_cli_utils.py +69 -0
  11. huggingface_hub/cli/auth.py +210 -0
  12. huggingface_hub/cli/cache.py +405 -0
  13. huggingface_hub/cli/download.py +181 -0
  14. huggingface_hub/cli/hf.py +66 -0
  15. huggingface_hub/cli/jobs.py +522 -0
  16. huggingface_hub/cli/lfs.py +198 -0
  17. huggingface_hub/cli/repo.py +243 -0
  18. huggingface_hub/cli/repo_files.py +128 -0
  19. huggingface_hub/cli/system.py +52 -0
  20. huggingface_hub/cli/upload.py +316 -0
  21. huggingface_hub/cli/upload_large_folder.py +132 -0
  22. huggingface_hub/commands/_cli_utils.py +5 -0
  23. huggingface_hub/commands/delete_cache.py +3 -1
  24. huggingface_hub/commands/download.py +4 -0
  25. huggingface_hub/commands/env.py +3 -0
  26. huggingface_hub/commands/huggingface_cli.py +2 -0
  27. huggingface_hub/commands/repo.py +4 -0
  28. huggingface_hub/commands/repo_files.py +4 -0
  29. huggingface_hub/commands/scan_cache.py +3 -1
  30. huggingface_hub/commands/tag.py +3 -1
  31. huggingface_hub/commands/upload.py +4 -0
  32. huggingface_hub/commands/upload_large_folder.py +3 -1
  33. huggingface_hub/commands/user.py +11 -1
  34. huggingface_hub/commands/version.py +3 -0
  35. huggingface_hub/constants.py +1 -0
  36. huggingface_hub/file_download.py +16 -5
  37. huggingface_hub/hf_api.py +519 -7
  38. huggingface_hub/hf_file_system.py +8 -16
  39. huggingface_hub/hub_mixin.py +3 -3
  40. huggingface_hub/inference/_client.py +38 -39
  41. huggingface_hub/inference/_common.py +38 -11
  42. huggingface_hub/inference/_generated/_async_client.py +50 -51
  43. huggingface_hub/inference/_generated/types/__init__.py +1 -0
  44. huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
  45. huggingface_hub/inference/_mcp/cli.py +36 -18
  46. huggingface_hub/inference/_mcp/constants.py +8 -0
  47. huggingface_hub/inference/_mcp/types.py +3 -0
  48. huggingface_hub/inference/_providers/__init__.py +4 -1
  49. huggingface_hub/inference/_providers/_common.py +3 -6
  50. huggingface_hub/inference/_providers/fal_ai.py +85 -42
  51. huggingface_hub/inference/_providers/hf_inference.py +17 -9
  52. huggingface_hub/inference/_providers/replicate.py +19 -1
  53. huggingface_hub/keras_mixin.py +2 -2
  54. huggingface_hub/repocard.py +1 -1
  55. huggingface_hub/repository.py +2 -2
  56. huggingface_hub/utils/_auth.py +1 -1
  57. huggingface_hub/utils/_cache_manager.py +2 -2
  58. huggingface_hub/utils/_dotenv.py +51 -0
  59. huggingface_hub/utils/_headers.py +1 -1
  60. huggingface_hub/utils/_runtime.py +1 -1
  61. huggingface_hub/utils/_xet.py +6 -2
  62. huggingface_hub/utils/_xet_progress_reporting.py +141 -0
  63. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/METADATA +7 -8
  64. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/RECORD +68 -51
  65. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/entry_points.txt +1 -0
  66. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/LICENSE +0 -0
  67. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/WHEEL +0 -0
  68. {huggingface_hub-0.33.5.dist-info → huggingface_hub-0.34.0.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,6 @@ Type definitions and utilities for the `create_commit` API
4
4
 
5
5
  import base64
6
6
  import io
7
- import math
8
7
  import os
9
8
  import warnings
10
9
  from collections import defaultdict
@@ -23,6 +22,7 @@ from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
23
22
  from .utils import (
24
23
  FORBIDDEN_FOLDERS,
25
24
  XetTokenType,
25
+ are_progress_bars_disabled,
26
26
  chunk_iterable,
27
27
  fetch_xet_connection_info_from_repo_info,
28
28
  get_session,
@@ -33,7 +33,6 @@ from .utils import (
33
33
  validate_hf_hub_args,
34
34
  )
35
35
  from .utils import tqdm as hf_tqdm
36
- from .utils.tqdm import _get_progress_bar_context
37
36
 
38
37
 
39
38
  if TYPE_CHECKING:
@@ -529,9 +528,12 @@ def _upload_xet_files(
529
528
  """
530
529
  if len(additions) == 0:
531
530
  return
531
+
532
532
  # at this point, we know that hf_xet is installed
533
533
  from hf_xet import upload_bytes, upload_files
534
534
 
535
+ from .utils._xet_progress_reporting import XetProgressReporter
536
+
535
537
  try:
536
538
  xet_connection_info = fetch_xet_connection_info_from_repo_info(
537
539
  token_type=XetTokenType.WRITE,
@@ -567,32 +569,18 @@ def _upload_xet_files(
567
569
  raise XetRefreshTokenError("Failed to refresh xet token")
568
570
  return new_xet_connection.access_token, new_xet_connection.expiration_unix_epoch
569
571
 
570
- num_chunks = math.ceil(len(additions) / UPLOAD_BATCH_MAX_NUM_FILES)
571
- num_chunks_num_digits = int(math.log10(num_chunks)) + 1
572
- for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
573
- _chunk = [op for op in chunk]
574
-
575
- bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
576
- paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
577
- expected_size = sum(op.upload_info.size for op in bytes_ops + paths_ops)
572
+ if not are_progress_bars_disabled():
573
+ progress = XetProgressReporter()
574
+ progress_callback = progress.update_progress
575
+ else:
576
+ progress, progress_callback = None, None
578
577
 
579
- if num_chunks > 1:
580
- description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
581
- else:
582
- description = "Uploading..."
583
- progress_cm = _get_progress_bar_context(
584
- desc=description,
585
- total=expected_size,
586
- initial=0,
587
- unit="B",
588
- unit_scale=True,
589
- name="huggingface_hub.xet_put",
590
- log_level=logger.getEffectiveLevel(),
591
- )
592
- with progress_cm as progress:
578
+ try:
579
+ for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
580
+ _chunk = [op for op in chunk]
593
581
 
594
- def update_progress(increment: int):
595
- progress.update(increment)
582
+ bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
583
+ paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
596
584
 
597
585
  if len(paths_ops) > 0:
598
586
  upload_files(
@@ -600,7 +588,7 @@ def _upload_xet_files(
600
588
  xet_endpoint,
601
589
  access_token_info,
602
590
  token_refresher,
603
- update_progress,
591
+ progress_callback,
604
592
  repo_type,
605
593
  )
606
594
  if len(bytes_ops) > 0:
@@ -609,9 +597,14 @@ def _upload_xet_files(
609
597
  xet_endpoint,
610
598
  access_token_info,
611
599
  token_refresher,
612
- update_progress,
600
+ progress_callback,
613
601
  repo_type,
614
602
  )
603
+
604
+ finally:
605
+ if progress is not None:
606
+ progress.close(False)
607
+
615
608
  return
616
609
 
617
610
 
@@ -0,0 +1,145 @@
1
+ # coding=utf-8
2
+ # Copyright 2025-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ from dataclasses import dataclass
16
+ from datetime import datetime
17
+ from enum import Enum
18
+ from typing import Any, Dict, List, Optional
19
+
20
+ from huggingface_hub import constants
21
+ from huggingface_hub._space_api import SpaceHardware
22
+ from huggingface_hub.utils._datetime import parse_datetime
23
+
24
+
25
+ class JobStage(str, Enum):
26
+ """
27
+ Enumeration of possible stage of a Job on the Hub.
28
+
29
+ Value can be compared to a string:
30
+ ```py
31
+ assert JobStage.COMPLETED == "COMPLETED"
32
+ ```
33
+
34
+ Taken from https://github.com/huggingface/moon-landing/blob/main/server/job_types/JobInfo.ts#L61 (private url).
35
+ """
36
+
37
+ # Copied from moon-landing > server > lib > Job.ts
38
+ COMPLETED = "COMPLETED"
39
+ CANCELED = "CANCELED"
40
+ ERROR = "ERROR"
41
+ DELETED = "DELETED"
42
+ RUNNING = "RUNNING"
43
+
44
+
45
+ @dataclass
46
+ class JobStatus:
47
+ stage: JobStage
48
+ message: Optional[str]
49
+
50
+ def __init__(self, **kwargs) -> None:
51
+ self.stage = kwargs["stage"]
52
+ self.message = kwargs.get("message")
53
+
54
+
55
+ @dataclass
56
+ class JobOwner:
57
+ id: str
58
+ name: str
59
+
60
+
61
+ @dataclass
62
+ class JobInfo:
63
+ """
64
+ Contains information about a Job.
65
+
66
+ Args:
67
+ id (`str`):
68
+ Job ID.
69
+ created_at (`datetime` or `None`):
70
+ When the Job was created.
71
+ docker_image (`str` or `None`):
72
+ The Docker image from Docker Hub used for the Job.
73
+ Can be None if space_id is present instead.
74
+ space_id (`str` or `None`):
75
+ The Docker image from Hugging Face Spaces used for the Job.
76
+ Can be None if docker_image is present instead.
77
+ command (`List[str]` or `None`):
78
+ Command of the Job, e.g. `["python", "-c", "print('hello world')"]`
79
+ arguments (`List[str]` or `None`):
80
+ Arguments passed to the command
81
+ environment (`Dict[str]` or `None`):
82
+ Environment variables of the Job as a dictionary.
83
+ secrets (`Dict[str]` or `None`):
84
+ Secret environment variables of the Job (encrypted).
85
+ flavor (`str` or `None`):
86
+ Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
87
+ E.g. `"cpu-basic"`.
88
+ status: (`JobStatus` or `None`):
89
+ Status of the Job, e.g. `JobStatus(stage="RUNNING", message=None)`
90
+ See [`JobStage`] for possible stage values.
91
+ status: (`JobOwner` or `None`):
92
+ Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq")`
93
+
94
+ Example:
95
+
96
+ ```python
97
+ >>> from huggingface_hub import run_job
98
+ >>> job = run_job(
99
+ ... image="python:3.12",
100
+ ... command=["python", "-c", "print('Hello from the cloud!')"]
101
+ ... )
102
+ >>> job
103
+ JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
104
+ >>> job.id
105
+ '687fb701029421ae5549d998'
106
+ >>> job.url
107
+ 'https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998'
108
+ >>> job.status.stage
109
+ 'RUNNING'
110
+ ```
111
+ """
112
+
113
+ id: str
114
+ created_at: Optional[datetime]
115
+ docker_image: Optional[str]
116
+ space_id: Optional[str]
117
+ command: Optional[List[str]]
118
+ arguments: Optional[List[str]]
119
+ environment: Optional[Dict[str, Any]]
120
+ secrets: Optional[Dict[str, Any]]
121
+ flavor: Optional[SpaceHardware]
122
+ status: Optional[JobStatus]
123
+ owner: Optional[JobOwner]
124
+
125
+ # Inferred fields
126
+ endpoint: str
127
+ url: str
128
+
129
+ def __init__(self, **kwargs) -> None:
130
+ self.id = kwargs["id"]
131
+ created_at = kwargs.get("createdAt") or kwargs.get("created_at")
132
+ self.created_at = parse_datetime(created_at) if created_at else None
133
+ self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
134
+ self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
135
+ self.owner = JobOwner(**(kwargs["owner"] if isinstance(kwargs.get("owner"), dict) else {}))
136
+ self.command = kwargs.get("command")
137
+ self.arguments = kwargs.get("arguments")
138
+ self.environment = kwargs.get("environment")
139
+ self.secrets = kwargs.get("secrets")
140
+ self.flavor = kwargs.get("flavor")
141
+ self.status = JobStatus(**(kwargs["status"] if isinstance(kwargs.get("status"), dict) else {}))
142
+
143
+ # Inferred fields
144
+ self.endpoint = kwargs.get("endpoint", constants.ENDPOINT)
145
+ self.url = f"{self.endpoint}/jobs/{self.owner.name}/{self.id}"
@@ -86,7 +86,13 @@ class LocalDownloadFilePaths:
86
86
 
87
87
  def incomplete_path(self, etag: str) -> Path:
88
88
  """Return the path where a file will be temporarily downloaded before being moved to `file_path`."""
89
- return self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete"
89
+ path = self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete"
90
+ resolved_path = str(path.resolve())
91
+ # Some Windows versions do not allow for paths longer than 255 characters.
92
+ # In this case, we must specify it as an extended path by using the "\\?\" prefix.
93
+ if len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"):
94
+ path = Path("\\\\?\\" + resolved_path)
95
+ return path
90
96
 
91
97
 
92
98
  @dataclass(frozen=True)
huggingface_hub/_login.py CHANGED
@@ -75,7 +75,7 @@ def login(
75
75
  components. If `token` is not provided, it will be prompted to the user either with
76
76
  a widget (in a notebook) or via the terminal.
77
77
 
78
- To log in from outside of a script, one can also use `huggingface-cli login` which is
78
+ To log in from outside of a script, one can also use `hf auth login` which is
79
79
  a cli command that wraps [`login`].
80
80
 
81
81
  <Tip>
@@ -120,7 +120,7 @@ def login(
120
120
  logger.info(
121
121
  "The token has not been saved to the git credentials helper. Pass "
122
122
  "`add_to_git_credential=True` in this function directly or "
123
- "`--add-to-git-credential` if using via `huggingface-cli` if "
123
+ "`--add-to-git-credential` if using via `hf`CLI if "
124
124
  "you want to set the git credential as well."
125
125
  )
126
126
  _login(token, add_to_git_credential=add_to_git_credential)
@@ -233,7 +233,7 @@ def auth_list() -> None:
233
233
  )
234
234
  elif current_token_name is None:
235
235
  logger.warning(
236
- "\nNote: No active token is set and no environment variable `HF_TOKEN` is found. Use `huggingface-cli login` to log in."
236
+ "\nNote: No active token is set and no environment variable `HF_TOKEN` is found. Use `hf auth login` to log in."
237
237
  )
238
238
 
239
239
 
@@ -273,8 +273,8 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
273
273
  print(_HF_LOGO_ASCII)
274
274
  if get_token() is not None:
275
275
  logger.info(
276
- " A token is already saved on your machine. Run `huggingface-cli"
277
- " whoami` to get more information or `huggingface-cli logout` if you want"
276
+ " A token is already saved on your machine. Run `hf auth whoami`"
277
+ " to get more information or `hf auth logout` if you want"
278
278
  " to log out."
279
279
  )
280
280
  logger.info(" Setting a new token will erase the existing one.")
huggingface_hub/_oauth.py CHANGED
@@ -6,7 +6,7 @@ import time
6
6
  import urllib.parse
7
7
  import warnings
8
8
  from dataclasses import dataclass
9
- from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
9
+ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
10
10
 
11
11
  from . import constants
12
12
  from .hf_api import whoami
@@ -39,10 +39,8 @@ class OAuthOrgInfo:
39
39
  Whether the org has a payment method set up. Hugging Face field.
40
40
  role_in_org (`Optional[str]`, *optional*):
41
41
  The user's role in the org. Hugging Face field.
42
- pending_sso (`Optional[bool]`, *optional*):
43
- Indicates if the user granted the OAuth app access to the org but didn't complete SSO. Hugging Face field.
44
- missing_mfa (`Optional[bool]`, *optional*):
45
- Indicates if the user granted the OAuth app access to the org but didn't complete MFA. Hugging Face field.
42
+ security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
43
+ Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field.
46
44
  """
47
45
 
48
46
  sub: str
@@ -52,8 +50,7 @@ class OAuthOrgInfo:
52
50
  is_enterprise: bool
53
51
  can_pay: Optional[bool] = None
54
52
  role_in_org: Optional[str] = None
55
- pending_sso: Optional[bool] = None
56
- missing_mfa: Optional[bool] = None
53
+ security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None
57
54
 
58
55
 
59
56
  @dataclass
@@ -221,8 +218,7 @@ def parse_huggingface_oauth(request: "fastapi.Request") -> Optional[OAuthInfo]:
221
218
  is_enterprise=org.get("isEnterprise"),
222
219
  can_pay=org.get("canPay"),
223
220
  role_in_org=org.get("roleInOrg"),
224
- pending_sso=org.get("pendingSSO"),
225
- missing_mfa=org.get("missingMFA"),
221
+ security_restrictions=org.get("securityRestrictions"),
226
222
  )
227
223
  for org in orgs_data
228
224
  ]
@@ -415,7 +411,7 @@ def _get_mocked_oauth_info() -> Dict:
415
411
  if token is None:
416
412
  raise ValueError(
417
413
  "Your machine must be logged in to HF to debug an OAuth app locally. Please"
418
- " run `huggingface-cli login` or set `HF_TOKEN` as environment variable "
414
+ " run `hf auth login` or set `HF_TOKEN` as environment variable "
419
415
  "with one of your access token. You can generate a new token in your "
420
416
  "settings page (https://huggingface.co/settings/tokens)."
421
417
  )
@@ -254,14 +254,19 @@ def snapshot_download(
254
254
  # At this stage, internet connection is up and running
255
255
  # => let's download the files!
256
256
  assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
257
- assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list."
258
257
 
259
258
  # Corner case: on very large repos, the siblings list in `repo_info` might not contain all files.
260
259
  # In that case, we need to use the `list_repo_tree` method to prevent caching issues.
261
- repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings]
262
- has_many_files = len(repo_info.siblings) > VERY_LARGE_REPO_THRESHOLD
263
- if has_many_files:
264
- logger.info("The repo has more than 50,000 files. Using `list_repo_tree` to ensure all files are listed.")
260
+ repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings] if repo_info.siblings is not None else []
261
+ unreliable_nb_files = (
262
+ repo_info.siblings is None
263
+ or len(repo_info.siblings) == 0
264
+ or len(repo_info.siblings) > VERY_LARGE_REPO_THRESHOLD
265
+ )
266
+ if unreliable_nb_files:
267
+ logger.info(
268
+ "Number of files in the repo is unreliable. Using `list_repo_tree` to ensure all files are listed."
269
+ )
265
270
  repo_files = (
266
271
  f.rfilename
267
272
  for f in api.list_repo_tree(repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type)
@@ -274,7 +279,7 @@ def snapshot_download(
274
279
  ignore_patterns=ignore_patterns,
275
280
  )
276
281
 
277
- if not has_many_files:
282
+ if not unreliable_nb_files:
278
283
  filtered_repo_files = list(filtered_repo_files)
279
284
  tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
280
285
  else:
@@ -33,6 +33,7 @@ from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_lo
33
33
  from .constants import DEFAULT_REVISION, REPO_TYPES
34
34
  from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
35
35
  from .utils._cache_manager import _format_size
36
+ from .utils._runtime import is_xet_available
36
37
  from .utils.sha import sha_fileobj
37
38
 
38
39
 
@@ -45,6 +46,9 @@ WAITING_TIME_IF_NO_TASKS = 10 # seconds
45
46
  MAX_NB_FILES_FETCH_UPLOAD_MODE = 100
46
47
  COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
47
48
 
49
+ UPLOAD_BATCH_SIZE_XET = 256 # Max 256 files per upload batch for XET-enabled repos
50
+ UPLOAD_BATCH_SIZE_LFS = 1 # Otherwise, batches of 1 for regular LFS upload
51
+
48
52
 
49
53
  def upload_large_folder_internal(
50
54
  api: "HfApi",
@@ -93,6 +97,17 @@ def upload_large_folder_internal(
93
97
  repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
94
98
  logger.info(f"Repo created: {repo_url}")
95
99
  repo_id = repo_url.repo_id
100
+ # 2.1 Check if xet is enabled to set batch file upload size
101
+ is_xet_enabled = (
102
+ is_xet_available()
103
+ and api.repo_info(
104
+ repo_id=repo_id,
105
+ repo_type=repo_type,
106
+ revision=revision,
107
+ expand="xetEnabled",
108
+ ).xet_enabled
109
+ )
110
+ upload_batch_size = UPLOAD_BATCH_SIZE_XET if is_xet_enabled else UPLOAD_BATCH_SIZE_LFS
96
111
 
97
112
  # 3. List files to upload
98
113
  filtered_paths_list = filter_repo_objects(
@@ -110,7 +125,7 @@ def upload_large_folder_internal(
110
125
  ]
111
126
 
112
127
  # 4. Start workers
113
- status = LargeUploadStatus(items)
128
+ status = LargeUploadStatus(items, upload_batch_size)
114
129
  threads = [
115
130
  threading.Thread(
116
131
  target=_worker_job,
@@ -168,7 +183,7 @@ JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
168
183
  class LargeUploadStatus:
169
184
  """Contains information, queues and tasks for a large upload process."""
170
185
 
171
- def __init__(self, items: List[JOB_ITEM_T]):
186
+ def __init__(self, items: List[JOB_ITEM_T], upload_batch_size: int = 1):
172
187
  self.items = items
173
188
  self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
174
189
  self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
@@ -179,6 +194,7 @@ class LargeUploadStatus:
179
194
  self.nb_workers_sha256: int = 0
180
195
  self.nb_workers_get_upload_mode: int = 0
181
196
  self.nb_workers_preupload_lfs: int = 0
197
+ self.upload_batch_size: int = upload_batch_size
182
198
  self.nb_workers_commit: int = 0
183
199
  self.nb_workers_waiting: int = 0
184
200
  self.last_commit_attempt: Optional[float] = None
@@ -353,16 +369,17 @@ def _worker_job(
353
369
  status.nb_workers_get_upload_mode -= 1
354
370
 
355
371
  elif job == WorkerJob.PREUPLOAD_LFS:
356
- item = items[0] # single item
357
372
  try:
358
- _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
359
- status.queue_commit.put(item)
373
+ _preupload_lfs(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
374
+ for item in items:
375
+ status.queue_commit.put(item)
360
376
  except KeyboardInterrupt:
361
377
  raise
362
378
  except Exception as e:
363
379
  logger.error(f"Failed to preupload LFS: {e}")
364
380
  traceback.format_exc()
365
- status.queue_preupload_lfs.put(item)
381
+ for item in items:
382
+ status.queue_preupload_lfs.put(item)
366
383
 
367
384
  with status.lock:
368
385
  status.nb_workers_preupload_lfs -= 1
@@ -417,11 +434,11 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
417
434
  logger.debug(f"Job: get upload mode (>{MAX_NB_FILES_FETCH_UPLOAD_MODE} files ready)")
418
435
  return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
419
436
 
420
- # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
421
- elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
437
+ # 4. Preupload LFS file if at least `status.upload_batch_size` files and no worker is preuploading LFS
438
+ elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and status.nb_workers_preupload_lfs == 0:
422
439
  status.nb_workers_preupload_lfs += 1
423
440
  logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
424
- return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
441
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
425
442
 
426
443
  # 5. Compute sha256 if at least 1 file and no worker is computing sha256
427
444
  elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
@@ -435,14 +452,14 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
435
452
  logger.debug("Job: get upload mode (no other worker getting upload mode)")
436
453
  return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
437
454
 
438
- # 7. Preupload LFS file if at least 1 file
455
+ # 7. Preupload LFS file if at least `status.upload_batch_size` files
439
456
  # Skip if hf_transfer is enabled and there is already a worker preuploading LFS
440
- elif status.queue_preupload_lfs.qsize() > 0 and (
457
+ elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and (
441
458
  status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
442
459
  ):
443
460
  status.nb_workers_preupload_lfs += 1
444
461
  logger.debug("Job: preupload LFS")
445
- return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
462
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
446
463
 
447
464
  # 8. Compute sha256 if at least 1 file
448
465
  elif status.queue_sha256.qsize() > 0:
@@ -456,7 +473,13 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
456
473
  logger.debug("Job: get upload mode")
457
474
  return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
458
475
 
459
- # 10. Commit if at least 1 file and 1 min since last commit attempt
476
+ # 10. Preupload LFS file if at least 1 file
477
+ elif status.queue_preupload_lfs.qsize() > 0:
478
+ status.nb_workers_preupload_lfs += 1
479
+ logger.debug("Job: preupload LFS")
480
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
481
+
482
+ # 11. Commit if at least 1 file and 1 min since last commit attempt
460
483
  elif (
461
484
  status.nb_workers_commit == 0
462
485
  and status.queue_commit.qsize() > 0
@@ -467,7 +490,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
467
490
  logger.debug("Job: commit (1 min since last commit attempt)")
468
491
  return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
469
492
 
470
- # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
493
+ # 12. Commit if at least 1 file all other queues are empty and all workers are waiting
471
494
  # e.g. when it's the last commit
472
495
  elif (
473
496
  status.nb_workers_commit == 0
@@ -483,12 +506,12 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
483
506
  logger.debug("Job: commit")
484
507
  return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
485
508
 
486
- # 12. If all queues are empty, exit
509
+ # 13. If all queues are empty, exit
487
510
  elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
488
511
  logger.info("All files have been processed! Exiting worker.")
489
512
  return None
490
513
 
491
- # 13. If no task is available, wait
514
+ # 14. If no task is available, wait
492
515
  else:
493
516
  status.nb_workers_waiting += 1
494
517
  logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
@@ -531,19 +554,19 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
531
554
  metadata.save(paths)
532
555
 
533
556
 
534
- def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
535
- """Preupload LFS file and update metadata."""
536
- paths, metadata = item
537
- addition = _build_hacky_operation(item)
557
+ def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
558
+ """Preupload LFS files and update metadata."""
559
+ additions = [_build_hacky_operation(item) for item in items]
538
560
  api.preupload_lfs_files(
539
561
  repo_id=repo_id,
540
562
  repo_type=repo_type,
541
563
  revision=revision,
542
- additions=[addition],
564
+ additions=additions,
543
565
  )
544
566
 
545
- metadata.is_uploaded = True
546
- metadata.save(paths)
567
+ for paths, metadata in items:
568
+ metadata.is_uploaded = True
569
+ metadata.save(paths)
547
570
 
548
571
 
549
572
  def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
@@ -0,0 +1,27 @@
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from argparse import _SubParsersAction
17
+
18
+
19
+ class BaseHuggingfaceCLICommand(ABC):
20
+ @staticmethod
21
+ @abstractmethod
22
+ def register_subcommand(parser: _SubParsersAction):
23
+ raise NotImplementedError()
24
+
25
+ @abstractmethod
26
+ def run(self):
27
+ raise NotImplementedError()
@@ -0,0 +1,69 @@
1
+ # Copyright 2022 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains a utility for good-looking prints."""
15
+
16
+ import os
17
+ from typing import List, Union
18
+
19
+
20
+ class ANSI:
21
+ """
22
+ Helper for en.wikipedia.org/wiki/ANSI_escape_code
23
+ """
24
+
25
+ _bold = "\u001b[1m"
26
+ _gray = "\u001b[90m"
27
+ _red = "\u001b[31m"
28
+ _reset = "\u001b[0m"
29
+ _yellow = "\u001b[33m"
30
+
31
+ @classmethod
32
+ def bold(cls, s: str) -> str:
33
+ return cls._format(s, cls._bold)
34
+
35
+ @classmethod
36
+ def gray(cls, s: str) -> str:
37
+ return cls._format(s, cls._gray)
38
+
39
+ @classmethod
40
+ def red(cls, s: str) -> str:
41
+ return cls._format(s, cls._bold + cls._red)
42
+
43
+ @classmethod
44
+ def yellow(cls, s: str) -> str:
45
+ return cls._format(s, cls._yellow)
46
+
47
+ @classmethod
48
+ def _format(cls, s: str, code: str) -> str:
49
+ if os.environ.get("NO_COLOR"):
50
+ # See https://no-color.org/
51
+ return s
52
+ return f"{code}{s}{cls._reset}"
53
+
54
+
55
+ def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
56
+ """
57
+ Inspired by:
58
+
59
+ - stackoverflow.com/a/8356620/593036
60
+ - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
61
+ """
62
+ col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
63
+ row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
64
+ lines = []
65
+ lines.append(row_format.format(*headers))
66
+ lines.append(row_format.format(*["-" * w for w in col_widths]))
67
+ for row in rows:
68
+ lines.append(row_format.format(*row))
69
+ return "\n".join(lines)