huggingface-hub 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. huggingface_hub/__init__.py +145 -46
  2. huggingface_hub/_commit_api.py +168 -119
  3. huggingface_hub/_commit_scheduler.py +15 -15
  4. huggingface_hub/_inference_endpoints.py +15 -12
  5. huggingface_hub/_jobs_api.py +301 -0
  6. huggingface_hub/_local_folder.py +18 -3
  7. huggingface_hub/_login.py +31 -63
  8. huggingface_hub/_oauth.py +460 -0
  9. huggingface_hub/_snapshot_download.py +239 -80
  10. huggingface_hub/_space_api.py +5 -5
  11. huggingface_hub/_tensorboard_logger.py +15 -19
  12. huggingface_hub/_upload_large_folder.py +172 -76
  13. huggingface_hub/_webhooks_payload.py +3 -3
  14. huggingface_hub/_webhooks_server.py +13 -25
  15. huggingface_hub/{commands → cli}/__init__.py +1 -15
  16. huggingface_hub/cli/_cli_utils.py +173 -0
  17. huggingface_hub/cli/auth.py +147 -0
  18. huggingface_hub/cli/cache.py +841 -0
  19. huggingface_hub/cli/download.py +189 -0
  20. huggingface_hub/cli/hf.py +60 -0
  21. huggingface_hub/cli/inference_endpoints.py +377 -0
  22. huggingface_hub/cli/jobs.py +772 -0
  23. huggingface_hub/cli/lfs.py +175 -0
  24. huggingface_hub/cli/repo.py +315 -0
  25. huggingface_hub/cli/repo_files.py +94 -0
  26. huggingface_hub/{commands/env.py → cli/system.py} +10 -13
  27. huggingface_hub/cli/upload.py +294 -0
  28. huggingface_hub/cli/upload_large_folder.py +117 -0
  29. huggingface_hub/community.py +20 -12
  30. huggingface_hub/constants.py +38 -53
  31. huggingface_hub/dataclasses.py +609 -0
  32. huggingface_hub/errors.py +80 -30
  33. huggingface_hub/fastai_utils.py +30 -41
  34. huggingface_hub/file_download.py +435 -351
  35. huggingface_hub/hf_api.py +2050 -1124
  36. huggingface_hub/hf_file_system.py +269 -152
  37. huggingface_hub/hub_mixin.py +43 -63
  38. huggingface_hub/inference/_client.py +347 -434
  39. huggingface_hub/inference/_common.py +133 -121
  40. huggingface_hub/inference/_generated/_async_client.py +397 -541
  41. huggingface_hub/inference/_generated/types/__init__.py +5 -1
  42. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  43. huggingface_hub/inference/_generated/types/base.py +10 -7
  44. huggingface_hub/inference/_generated/types/chat_completion.py +59 -23
  45. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  46. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  47. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  48. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  49. huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
  50. huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
  51. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  52. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  53. huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
  54. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  55. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  56. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  57. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  58. huggingface_hub/inference/_generated/types/translation.py +2 -2
  59. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  60. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  61. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  62. huggingface_hub/inference/_mcp/__init__.py +0 -0
  63. huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
  64. huggingface_hub/inference/_mcp/agent.py +100 -0
  65. huggingface_hub/inference/_mcp/cli.py +247 -0
  66. huggingface_hub/inference/_mcp/constants.py +81 -0
  67. huggingface_hub/inference/_mcp/mcp_client.py +395 -0
  68. huggingface_hub/inference/_mcp/types.py +45 -0
  69. huggingface_hub/inference/_mcp/utils.py +128 -0
  70. huggingface_hub/inference/_providers/__init__.py +82 -7
  71. huggingface_hub/inference/_providers/_common.py +129 -27
  72. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  73. huggingface_hub/inference/_providers/cerebras.py +1 -1
  74. huggingface_hub/inference/_providers/clarifai.py +13 -0
  75. huggingface_hub/inference/_providers/cohere.py +20 -3
  76. huggingface_hub/inference/_providers/fal_ai.py +183 -56
  77. huggingface_hub/inference/_providers/featherless_ai.py +38 -0
  78. huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
  79. huggingface_hub/inference/_providers/groq.py +9 -0
  80. huggingface_hub/inference/_providers/hf_inference.py +69 -30
  81. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  82. huggingface_hub/inference/_providers/nebius.py +33 -5
  83. huggingface_hub/inference/_providers/novita.py +5 -5
  84. huggingface_hub/inference/_providers/nscale.py +44 -0
  85. huggingface_hub/inference/_providers/openai.py +3 -1
  86. huggingface_hub/inference/_providers/publicai.py +6 -0
  87. huggingface_hub/inference/_providers/replicate.py +31 -13
  88. huggingface_hub/inference/_providers/sambanova.py +18 -4
  89. huggingface_hub/inference/_providers/scaleway.py +28 -0
  90. huggingface_hub/inference/_providers/together.py +20 -5
  91. huggingface_hub/inference/_providers/wavespeed.py +138 -0
  92. huggingface_hub/inference/_providers/zai_org.py +17 -0
  93. huggingface_hub/lfs.py +33 -100
  94. huggingface_hub/repocard.py +34 -38
  95. huggingface_hub/repocard_data.py +57 -57
  96. huggingface_hub/serialization/__init__.py +0 -1
  97. huggingface_hub/serialization/_base.py +12 -15
  98. huggingface_hub/serialization/_dduf.py +8 -8
  99. huggingface_hub/serialization/_torch.py +69 -69
  100. huggingface_hub/utils/__init__.py +19 -8
  101. huggingface_hub/utils/_auth.py +7 -7
  102. huggingface_hub/utils/_cache_manager.py +92 -147
  103. huggingface_hub/utils/_chunk_utils.py +2 -3
  104. huggingface_hub/utils/_deprecation.py +1 -1
  105. huggingface_hub/utils/_dotenv.py +55 -0
  106. huggingface_hub/utils/_experimental.py +7 -5
  107. huggingface_hub/utils/_fixes.py +0 -10
  108. huggingface_hub/utils/_git_credential.py +5 -5
  109. huggingface_hub/utils/_headers.py +8 -30
  110. huggingface_hub/utils/_http.py +398 -239
  111. huggingface_hub/utils/_pagination.py +4 -4
  112. huggingface_hub/utils/_parsing.py +98 -0
  113. huggingface_hub/utils/_paths.py +5 -5
  114. huggingface_hub/utils/_runtime.py +61 -24
  115. huggingface_hub/utils/_safetensors.py +21 -21
  116. huggingface_hub/utils/_subprocess.py +9 -9
  117. huggingface_hub/utils/_telemetry.py +4 -4
  118. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
  119. huggingface_hub/utils/_typing.py +25 -5
  120. huggingface_hub/utils/_validators.py +55 -74
  121. huggingface_hub/utils/_verification.py +167 -0
  122. huggingface_hub/utils/_xet.py +64 -17
  123. huggingface_hub/utils/_xet_progress_reporting.py +162 -0
  124. huggingface_hub/utils/insecure_hashlib.py +3 -5
  125. huggingface_hub/utils/logging.py +8 -11
  126. huggingface_hub/utils/tqdm.py +5 -4
  127. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -85
  128. huggingface_hub-1.1.3.dist-info/RECORD +155 -0
  129. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
  130. huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
  131. huggingface_hub/commands/delete_cache.py +0 -474
  132. huggingface_hub/commands/download.py +0 -200
  133. huggingface_hub/commands/huggingface_cli.py +0 -61
  134. huggingface_hub/commands/lfs.py +0 -200
  135. huggingface_hub/commands/repo_files.py +0 -128
  136. huggingface_hub/commands/scan_cache.py +0 -181
  137. huggingface_hub/commands/tag.py +0 -159
  138. huggingface_hub/commands/upload.py +0 -314
  139. huggingface_hub/commands/upload_large_folder.py +0 -129
  140. huggingface_hub/commands/user.py +0 -304
  141. huggingface_hub/commands/version.py +0 -37
  142. huggingface_hub/inference_api.py +0 -217
  143. huggingface_hub/keras_mixin.py +0 -500
  144. huggingface_hub/repository.py +0 -1477
  145. huggingface_hub/serialization/_tensorflow.py +0 -95
  146. huggingface_hub/utils/_hf_folder.py +0 -68
  147. huggingface_hub-0.31.0rc0.dist-info/RECORD +0 -135
  148. huggingface_hub-0.31.0rc0.dist-info/entry_points.txt +0 -6
  149. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
  150. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
@@ -24,15 +24,14 @@ import traceback
24
24
  from datetime import datetime
25
25
  from pathlib import Path
26
26
  from threading import Lock
27
- from typing import TYPE_CHECKING, List, Optional, Tuple, Union
27
+ from typing import TYPE_CHECKING, Any, Optional, Union
28
28
  from urllib.parse import quote
29
29
 
30
- from . import constants
31
30
  from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
32
31
  from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
33
32
  from .constants import DEFAULT_REVISION, REPO_TYPES
34
- from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
35
- from .utils._cache_manager import _format_size
33
+ from .utils import DEFAULT_IGNORE_PATTERNS, _format_size, filter_repo_objects, tqdm
34
+ from .utils._runtime import is_xet_available
36
35
  from .utils.sha import sha_fileobj
37
36
 
38
37
 
@@ -42,9 +41,113 @@ if TYPE_CHECKING:
42
41
  logger = logging.getLogger(__name__)
43
42
 
44
43
  WAITING_TIME_IF_NO_TASKS = 10 # seconds
45
- MAX_NB_REGULAR_FILES_PER_COMMIT = 75
46
- MAX_NB_LFS_FILES_PER_COMMIT = 150
47
- COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
44
+ MAX_NB_FILES_FETCH_UPLOAD_MODE = 100
45
+ COMMIT_SIZE_SCALE: list[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
46
+
47
+ UPLOAD_BATCH_SIZE_XET = 256 # Max 256 files per upload batch for XET-enabled repos
48
+ UPLOAD_BATCH_SIZE_LFS = 1 # Otherwise, batches of 1 for regular LFS upload
49
+
50
+ # Repository limits (from https://huggingface.co/docs/hub/repositories-recommendations)
51
+ MAX_FILES_PER_REPO = 100_000 # Recommended maximum number of files per repository
52
+ MAX_FILES_PER_FOLDER = 10_000 # Recommended maximum number of files per folder
53
+ MAX_FILE_SIZE_GB = 50 # Hard limit for individual file size
54
+ RECOMMENDED_FILE_SIZE_GB = 20 # Recommended maximum for individual file size
55
+
56
+
57
+ def _validate_upload_limits(paths_list: list[LocalUploadFilePaths]) -> None:
58
+ """
59
+ Validate upload against repository limits and warn about potential issues.
60
+
61
+ Args:
62
+ paths_list: List of file paths to be uploaded
63
+
64
+ Warns about:
65
+ - Too many files in the repository (>100k)
66
+ - Too many entries (files or subdirectories) in a single folder (>10k)
67
+ - Files exceeding size limits (>20GB recommended, >50GB hard limit)
68
+ """
69
+ logger.info("Running validation checks on files to upload...")
70
+
71
+ # Check 1: Total file count
72
+ if len(paths_list) > MAX_FILES_PER_REPO:
73
+ logger.warning(
74
+ f"You are about to upload {len(paths_list):,} files. "
75
+ f"This exceeds the recommended limit of {MAX_FILES_PER_REPO:,} files per repository.\n"
76
+ f"Consider:\n"
77
+ f" - Splitting your data into multiple repositories\n"
78
+ f" - Using fewer, larger files (e.g., parquet files)\n"
79
+ f" - See: https://huggingface.co/docs/hub/repositories-recommendations"
80
+ )
81
+
82
+ # Check 2: Files and subdirectories per folder
83
+ # Track immediate children (files and subdirs) for each folder
84
+ from collections import defaultdict
85
+
86
+ entries_per_folder: dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
87
+
88
+ for paths in paths_list:
89
+ path = Path(paths.path_in_repo)
90
+ parts = path.parts
91
+
92
+ # Count this file in its immediate parent directory
93
+ parent = str(path.parent) if str(path.parent) != "." else "."
94
+ entries_per_folder[parent]["files"] += 1
95
+
96
+ # Track immediate subdirectories for each parent folder
97
+ # Walk through the path components to track parent-child relationships
98
+ for i, child in enumerate(parts[:-1]):
99
+ parent = "." if i == 0 else "/".join(parts[:i])
100
+ entries_per_folder[parent]["subdirs"].add(child)
101
+
102
+ # Check limits for each folder
103
+ for folder, data in entries_per_folder.items():
104
+ file_count = data["files"]
105
+ subdir_count = len(data["subdirs"])
106
+ total_entries = file_count + subdir_count
107
+
108
+ if total_entries > MAX_FILES_PER_FOLDER:
109
+ folder_display = "root" if folder == "." else folder
110
+ logger.warning(
111
+ f"Folder '{folder_display}' contains {total_entries:,} entries "
112
+ f"({file_count:,} files and {subdir_count:,} subdirectories). "
113
+ f"This exceeds the recommended {MAX_FILES_PER_FOLDER:,} entries per folder.\n"
114
+ "Consider reorganising into sub-folders."
115
+ )
116
+
117
+ # Check 3: File sizes
118
+ large_files = []
119
+ very_large_files = []
120
+
121
+ for paths in paths_list:
122
+ size = paths.file_path.stat().st_size
123
+ size_gb = size / 1_000_000_000 # Use decimal GB as per Hub limits
124
+
125
+ if size_gb > MAX_FILE_SIZE_GB:
126
+ very_large_files.append((paths.path_in_repo, size_gb))
127
+ elif size_gb > RECOMMENDED_FILE_SIZE_GB:
128
+ large_files.append((paths.path_in_repo, size_gb))
129
+
130
+ # Warn about very large files (>50GB)
131
+ if very_large_files:
132
+ files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in very_large_files[:5])
133
+ more_str = f"\n ... and {len(very_large_files) - 5} more files" if len(very_large_files) > 5 else ""
134
+ logger.warning(
135
+ f"Found {len(very_large_files)} files exceeding the {MAX_FILE_SIZE_GB}GB hard limit:\n"
136
+ f" - {files_str}{more_str}\n"
137
+ f"These files may fail to upload. Consider splitting them into smaller chunks."
138
+ )
139
+
140
+ # Warn about large files (>20GB)
141
+ if large_files:
142
+ files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in large_files[:5])
143
+ more_str = f"\n ... and {len(large_files) - 5} more files" if len(large_files) > 5 else ""
144
+ logger.warning(
145
+ f"Found {len(large_files)} files larger than {RECOMMENDED_FILE_SIZE_GB}GB (recommended limit):\n"
146
+ f" - {files_str}{more_str}\n"
147
+ f"Large files may slow down loading and processing."
148
+ )
149
+
150
+ logger.info("Validation checks complete.")
48
151
 
49
152
 
50
153
  def upload_large_folder_internal(
@@ -55,8 +158,8 @@ def upload_large_folder_internal(
55
158
  repo_type: str, # Repo type is required!
56
159
  revision: Optional[str] = None,
57
160
  private: Optional[bool] = None,
58
- allow_patterns: Optional[Union[List[str], str]] = None,
59
- ignore_patterns: Optional[Union[List[str], str]] = None,
161
+ allow_patterns: Optional[Union[list[str], str]] = None,
162
+ ignore_patterns: Optional[Union[list[str], str]] = None,
60
163
  num_workers: Optional[int] = None,
61
164
  print_report: bool = True,
62
165
  print_report_every: int = 60,
@@ -94,6 +197,8 @@ def upload_large_folder_internal(
94
197
  repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
95
198
  logger.info(f"Repo created: {repo_url}")
96
199
  repo_id = repo_url.repo_id
200
+ # 2.1 Check if xet is enabled to set batch file upload size
201
+ upload_batch_size = UPLOAD_BATCH_SIZE_XET if is_xet_available() else UPLOAD_BATCH_SIZE_LFS
97
202
 
98
203
  # 3. List files to upload
99
204
  filtered_paths_list = filter_repo_objects(
@@ -104,6 +209,11 @@ def upload_large_folder_internal(
104
209
  paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
105
210
  logger.info(f"Found {len(paths_list)} candidate files to upload")
106
211
 
212
+ # Validate upload against repository limits
213
+ _validate_upload_limits(paths_list)
214
+
215
+ logger.info("Starting upload...")
216
+
107
217
  # Read metadata for each file
108
218
  items = [
109
219
  (paths, read_upload_metadata(folder_path, paths.path_in_repo))
@@ -111,7 +221,7 @@ def upload_large_folder_internal(
111
221
  ]
112
222
 
113
223
  # 4. Start workers
114
- status = LargeUploadStatus(items)
224
+ status = LargeUploadStatus(items, upload_batch_size)
115
225
  threads = [
116
226
  threading.Thread(
117
227
  target=_worker_job,
@@ -163,13 +273,13 @@ class WorkerJob(enum.Enum):
163
273
  WAIT = enum.auto() # if no tasks are available but we don't want to exit
164
274
 
165
275
 
166
- JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
276
+ JOB_ITEM_T = tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
167
277
 
168
278
 
169
279
  class LargeUploadStatus:
170
280
  """Contains information, queues and tasks for a large upload process."""
171
281
 
172
- def __init__(self, items: List[JOB_ITEM_T]):
282
+ def __init__(self, items: list[JOB_ITEM_T], upload_batch_size: int = 1):
173
283
  self.items = items
174
284
  self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
175
285
  self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
@@ -180,6 +290,7 @@ class LargeUploadStatus:
180
290
  self.nb_workers_sha256: int = 0
181
291
  self.nb_workers_get_upload_mode: int = 0
182
292
  self.nb_workers_preupload_lfs: int = 0
293
+ self.upload_batch_size: int = upload_batch_size
183
294
  self.nb_workers_commit: int = 0
184
295
  self.nb_workers_waiting: int = 0
185
296
  self.last_commit_attempt: Optional[float] = None
@@ -301,7 +412,7 @@ def _worker_job(
301
412
  Read `upload_large_folder` docstring for more information on how tasks are prioritized.
302
413
  """
303
414
  while True:
304
- next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None
415
+ next_job: Optional[tuple[WorkerJob, list[JOB_ITEM_T]]] = None
305
416
 
306
417
  # Determine next task
307
418
  next_job = _determine_next_job(status)
@@ -354,16 +465,17 @@ def _worker_job(
354
465
  status.nb_workers_get_upload_mode -= 1
355
466
 
356
467
  elif job == WorkerJob.PREUPLOAD_LFS:
357
- item = items[0] # single item
358
468
  try:
359
- _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
360
- status.queue_commit.put(item)
469
+ _preupload_lfs(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
470
+ for item in items:
471
+ status.queue_commit.put(item)
361
472
  except KeyboardInterrupt:
362
473
  raise
363
474
  except Exception as e:
364
475
  logger.error(f"Failed to preupload LFS: {e}")
365
476
  traceback.format_exc()
366
- status.queue_preupload_lfs.put(item)
477
+ for item in items:
478
+ status.queue_preupload_lfs.put(item)
367
479
 
368
480
  with status.lock:
369
481
  status.nb_workers_preupload_lfs -= 1
@@ -393,7 +505,7 @@ def _worker_job(
393
505
  status.nb_workers_waiting -= 1
394
506
 
395
507
 
396
- def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]:
508
+ def _determine_next_job(status: LargeUploadStatus) -> Optional[tuple[WorkerJob, list[JOB_ITEM_T]]]:
397
509
  with status.lock:
398
510
  # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
399
511
  if (
@@ -404,25 +516,25 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
404
516
  ):
405
517
  status.nb_workers_commit += 1
406
518
  logger.debug("Job: commit (more than 5 minutes since last commit attempt)")
407
- return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
519
+ return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
408
520
 
409
521
  # 2. Commit if at least 100 files are ready to commit
410
522
  elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150:
411
523
  status.nb_workers_commit += 1
412
524
  logger.debug("Job: commit (>100 files ready)")
413
- return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
525
+ return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
414
526
 
415
- # 3. Get upload mode if at least 10 files
416
- elif status.queue_get_upload_mode.qsize() >= 10:
527
+ # 3. Get upload mode if at least 100 files
528
+ elif status.queue_get_upload_mode.qsize() >= MAX_NB_FILES_FETCH_UPLOAD_MODE:
417
529
  status.nb_workers_get_upload_mode += 1
418
- logger.debug("Job: get upload mode (>10 files ready)")
419
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
530
+ logger.debug(f"Job: get upload mode (>{MAX_NB_FILES_FETCH_UPLOAD_MODE} files ready)")
531
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
420
532
 
421
- # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
422
- elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
533
+ # 4. Preupload LFS file if at least `status.upload_batch_size` files and no worker is preuploading LFS
534
+ elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and status.nb_workers_preupload_lfs == 0:
423
535
  status.nb_workers_preupload_lfs += 1
424
536
  logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
425
- return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
537
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
426
538
 
427
539
  # 5. Compute sha256 if at least 1 file and no worker is computing sha256
428
540
  elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
@@ -434,16 +546,13 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
434
546
  elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
435
547
  status.nb_workers_get_upload_mode += 1
436
548
  logger.debug("Job: get upload mode (no other worker getting upload mode)")
437
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
549
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
438
550
 
439
- # 7. Preupload LFS file if at least 1 file
440
- # Skip if hf_transfer is enabled and there is already a worker preuploading LFS
441
- elif status.queue_preupload_lfs.qsize() > 0 and (
442
- status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
443
- ):
551
+ # 7. Preupload LFS file if at least `status.upload_batch_size` files
552
+ elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size:
444
553
  status.nb_workers_preupload_lfs += 1
445
554
  logger.debug("Job: preupload LFS")
446
- return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
555
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
447
556
 
448
557
  # 8. Compute sha256 if at least 1 file
449
558
  elif status.queue_sha256.qsize() > 0:
@@ -455,9 +564,15 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
455
564
  elif status.queue_get_upload_mode.qsize() > 0:
456
565
  status.nb_workers_get_upload_mode += 1
457
566
  logger.debug("Job: get upload mode")
458
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
567
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
459
568
 
460
- # 10. Commit if at least 1 file and 1 min since last commit attempt
569
+ # 10. Preupload LFS file if at least 1 file
570
+ elif status.queue_preupload_lfs.qsize() > 0:
571
+ status.nb_workers_preupload_lfs += 1
572
+ logger.debug("Job: preupload LFS")
573
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
574
+
575
+ # 11. Commit if at least 1 file and 1 min since last commit attempt
461
576
  elif (
462
577
  status.nb_workers_commit == 0
463
578
  and status.queue_commit.qsize() > 0
@@ -466,9 +581,9 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
466
581
  ):
467
582
  status.nb_workers_commit += 1
468
583
  logger.debug("Job: commit (1 min since last commit attempt)")
469
- return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
584
+ return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
470
585
 
471
- # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
586
+ # 12. Commit if at least 1 file all other queues are empty and all workers are waiting
472
587
  # e.g. when it's the last commit
473
588
  elif (
474
589
  status.nb_workers_commit == 0
@@ -482,14 +597,14 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
482
597
  ):
483
598
  status.nb_workers_commit += 1
484
599
  logger.debug("Job: commit")
485
- return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
600
+ return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
486
601
 
487
- # 12. If all queues are empty, exit
602
+ # 13. If all queues are empty, exit
488
603
  elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
489
604
  logger.info("All files have been processed! Exiting worker.")
490
605
  return None
491
606
 
492
- # 13. If no task is available, wait
607
+ # 14. If no task is available, wait
493
608
  else:
494
609
  status.nb_workers_waiting += 1
495
610
  logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
@@ -510,7 +625,7 @@ def _compute_sha256(item: JOB_ITEM_T) -> None:
510
625
  metadata.save(paths)
511
626
 
512
627
 
513
- def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
628
+ def _get_upload_mode(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
514
629
  """Get upload mode for each file and update metadata.
515
630
 
516
631
  Also receive info if the file should be ignored.
@@ -522,30 +637,32 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
522
637
  repo_id=repo_id,
523
638
  headers=api._build_hf_headers(),
524
639
  revision=quote(revision, safe=""),
640
+ endpoint=api.endpoint,
525
641
  )
526
642
  for item, addition in zip(items, additions):
527
643
  paths, metadata = item
528
644
  metadata.upload_mode = addition._upload_mode
529
645
  metadata.should_ignore = addition._should_ignore
646
+ metadata.remote_oid = addition._remote_oid
530
647
  metadata.save(paths)
531
648
 
532
649
 
533
- def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
534
- """Preupload LFS file and update metadata."""
535
- paths, metadata = item
536
- addition = _build_hacky_operation(item)
650
+ def _preupload_lfs(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
651
+ """Preupload LFS files and update metadata."""
652
+ additions = [_build_hacky_operation(item) for item in items]
537
653
  api.preupload_lfs_files(
538
654
  repo_id=repo_id,
539
655
  repo_type=repo_type,
540
656
  revision=revision,
541
- additions=[addition],
657
+ additions=additions,
542
658
  )
543
659
 
544
- metadata.is_uploaded = True
545
- metadata.save(paths)
660
+ for paths, metadata in items:
661
+ metadata.is_uploaded = True
662
+ metadata.save(paths)
546
663
 
547
664
 
548
- def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
665
+ def _commit(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
549
666
  """Commit files to the repo."""
550
667
  additions = [_build_hacky_operation(item) for item in items]
551
668
  api.create_commit(
@@ -579,6 +696,9 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
579
696
  if metadata.sha256 is None:
580
697
  raise ValueError("sha256 must have been computed by now!")
581
698
  operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample)
699
+ operation._upload_mode = metadata.upload_mode # type: ignore[assignment]
700
+ operation._should_ignore = metadata.should_ignore
701
+ operation._remote_oid = metadata.remote_oid
582
702
  return operation
583
703
 
584
704
 
@@ -587,38 +707,14 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
587
707
  ####################
588
708
 
589
709
 
590
- def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
710
+ def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> list[JOB_ITEM_T]:
591
711
  return [queue.get()]
592
712
 
593
713
 
594
- def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
714
+ def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> list[JOB_ITEM_T]:
595
715
  return [queue.get() for _ in range(min(queue.qsize(), n))]
596
716
 
597
717
 
598
- def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
599
- """Special case for commit job: the number of items to commit depends on the type of files."""
600
- # Can take at most 50 regular files and/or 100 LFS files in a single commit
601
- items: List[JOB_ITEM_T] = []
602
- nb_lfs, nb_regular = 0, 0
603
- while True:
604
- # If empty queue => commit everything
605
- if queue.qsize() == 0:
606
- return items
607
-
608
- # If we have enough items => commit them
609
- if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT:
610
- return items
611
-
612
- # Else, get a new item and increase counter
613
- item = queue.get()
614
- items.append(item)
615
- _, metadata = item
616
- if metadata.upload_mode == "lfs":
617
- nb_lfs += 1
618
- else:
619
- nb_regular += 1
620
-
621
-
622
718
  def _print_overwrite(report: str) -> None:
623
719
  """Print a report, overwriting the previous lines.
624
720
 
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  """Contains data structures to parse the webhooks payload."""
16
16
 
17
- from typing import List, Literal, Optional
17
+ from typing import Literal, Optional
18
18
 
19
19
  from .utils import is_pydantic_available
20
20
 
@@ -116,7 +116,7 @@ class WebhookPayloadRepo(ObjectId):
116
116
  name: str
117
117
  private: bool
118
118
  subdomain: Optional[str] = None
119
- tags: Optional[List[str]] = None
119
+ tags: Optional[list[str]] = None
120
120
  type: Literal["dataset", "model", "space"]
121
121
  url: WebhookPayloadUrl
122
122
 
@@ -134,4 +134,4 @@ class WebhookPayload(BaseModel):
134
134
  comment: Optional[WebhookPayloadComment] = None
135
135
  webhook: WebhookPayloadWebhook
136
136
  movedTo: Optional[WebhookPayloadMovedTo] = None
137
- updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None
137
+ updatedRefs: Optional[list[WebhookPayloadUpdatedRef]] = None
@@ -18,7 +18,7 @@ import atexit
18
18
  import inspect
19
19
  import os
20
20
  from functools import wraps
21
- from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
21
+ from typing import TYPE_CHECKING, Any, Callable, Optional
22
22
 
23
23
  from .utils import experimental, is_fastapi_available, is_gradio_available
24
24
 
@@ -32,7 +32,7 @@ if is_fastapi_available():
32
32
  from fastapi.responses import JSONResponse
33
33
  else:
34
34
  # Will fail at runtime if FastAPI is not available
35
- FastAPI = Request = JSONResponse = None # type: ignore [misc, assignment]
35
+ FastAPI = Request = JSONResponse = None # type: ignore
36
36
 
37
37
 
38
38
  _global_app: Optional["WebhooksServer"] = None
@@ -50,20 +50,14 @@ class WebhooksServer:
50
50
  It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic
51
51
  model that contains all the information about the webhook event. The data will be parsed automatically for you.
52
52
 
53
- Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
53
+ Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to set up your
54
54
  WebhooksServer and deploy it on a Space.
55
55
 
56
- <Tip warning={true}>
56
+ > [!WARNING]
57
+ > `WebhooksServer` is experimental. Its API is subject to change in the future.
57
58
 
58
- `WebhooksServer` is experimental. Its API is subject to change in the future.
59
-
60
- </Tip>
61
-
62
- <Tip warning={true}>
63
-
64
- You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
65
-
66
- </Tip>
59
+ > [!WARNING]
60
+ > You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
67
61
 
68
62
  Args:
69
63
  ui (`gradio.Blocks`, optional):
@@ -115,7 +109,7 @@ class WebhooksServer:
115
109
  self._ui = ui
116
110
 
117
111
  self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
118
- self.registered_webhooks: Dict[str, Callable] = {}
112
+ self.registered_webhooks: dict[str, Callable] = {}
119
113
  _warn_on_empty_secret(self.webhook_secret)
120
114
 
121
115
  def add_webhook(self, path: Optional[str] = None) -> Callable:
@@ -237,20 +231,14 @@ def webhook_endpoint(path: Optional[str] = None) -> Callable:
237
231
  you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using
238
232
  this decorator multiple times.
239
233
 
240
- Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
234
+ Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to set up your
241
235
  server and deploy it on a Space.
242
236
 
243
- <Tip warning={true}>
244
-
245
- `webhook_endpoint` is experimental. Its API is subject to change in the future.
246
-
247
- </Tip>
248
-
249
- <Tip warning={true}>
250
-
251
- You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
237
+ > [!WARNING]
238
+ > `webhook_endpoint` is experimental. Its API is subject to change in the future.
252
239
 
253
- </Tip>
240
+ > [!WARNING]
241
+ > You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
254
242
 
255
243
  Args:
256
244
  path (`str`, optional):
@@ -1,4 +1,4 @@
1
- # Copyright 2020 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -11,17 +11,3 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
- from abc import ABC, abstractmethod
16
- from argparse import _SubParsersAction
17
-
18
-
19
- class BaseHuggingfaceCLICommand(ABC):
20
- @staticmethod
21
- @abstractmethod
22
- def register_subcommand(parser: _SubParsersAction):
23
- raise NotImplementedError()
24
-
25
- @abstractmethod
26
- def run(self):
27
- raise NotImplementedError()