huggingface-hub 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. huggingface_hub/__init__.py +160 -46
  2. huggingface_hub/_commit_api.py +277 -71
  3. huggingface_hub/_commit_scheduler.py +15 -15
  4. huggingface_hub/_inference_endpoints.py +33 -22
  5. huggingface_hub/_jobs_api.py +301 -0
  6. huggingface_hub/_local_folder.py +18 -3
  7. huggingface_hub/_login.py +31 -63
  8. huggingface_hub/_oauth.py +460 -0
  9. huggingface_hub/_snapshot_download.py +241 -81
  10. huggingface_hub/_space_api.py +18 -10
  11. huggingface_hub/_tensorboard_logger.py +15 -19
  12. huggingface_hub/_upload_large_folder.py +196 -76
  13. huggingface_hub/_webhooks_payload.py +3 -3
  14. huggingface_hub/_webhooks_server.py +15 -25
  15. huggingface_hub/{commands → cli}/__init__.py +1 -15
  16. huggingface_hub/cli/_cli_utils.py +173 -0
  17. huggingface_hub/cli/auth.py +147 -0
  18. huggingface_hub/cli/cache.py +841 -0
  19. huggingface_hub/cli/download.py +189 -0
  20. huggingface_hub/cli/hf.py +60 -0
  21. huggingface_hub/cli/inference_endpoints.py +377 -0
  22. huggingface_hub/cli/jobs.py +772 -0
  23. huggingface_hub/cli/lfs.py +175 -0
  24. huggingface_hub/cli/repo.py +315 -0
  25. huggingface_hub/cli/repo_files.py +94 -0
  26. huggingface_hub/{commands/env.py → cli/system.py} +10 -13
  27. huggingface_hub/cli/upload.py +294 -0
  28. huggingface_hub/cli/upload_large_folder.py +117 -0
  29. huggingface_hub/community.py +20 -12
  30. huggingface_hub/constants.py +83 -59
  31. huggingface_hub/dataclasses.py +609 -0
  32. huggingface_hub/errors.py +99 -30
  33. huggingface_hub/fastai_utils.py +30 -41
  34. huggingface_hub/file_download.py +606 -346
  35. huggingface_hub/hf_api.py +2445 -1132
  36. huggingface_hub/hf_file_system.py +269 -152
  37. huggingface_hub/hub_mixin.py +61 -66
  38. huggingface_hub/inference/_client.py +501 -630
  39. huggingface_hub/inference/_common.py +133 -121
  40. huggingface_hub/inference/_generated/_async_client.py +536 -722
  41. huggingface_hub/inference/_generated/types/__init__.py +6 -1
  42. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +5 -6
  43. huggingface_hub/inference/_generated/types/base.py +10 -7
  44. huggingface_hub/inference/_generated/types/chat_completion.py +77 -31
  45. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  46. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  47. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  48. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  49. huggingface_hub/inference/_generated/types/image_to_image.py +8 -2
  50. huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
  51. huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
  52. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  53. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  54. huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
  55. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  56. huggingface_hub/inference/_generated/types/text_generation.py +11 -11
  57. huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
  58. huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
  59. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  60. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  61. huggingface_hub/inference/_generated/types/translation.py +2 -2
  62. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  64. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  65. huggingface_hub/inference/_mcp/__init__.py +0 -0
  66. huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
  67. huggingface_hub/inference/_mcp/agent.py +100 -0
  68. huggingface_hub/inference/_mcp/cli.py +247 -0
  69. huggingface_hub/inference/_mcp/constants.py +81 -0
  70. huggingface_hub/inference/_mcp/mcp_client.py +395 -0
  71. huggingface_hub/inference/_mcp/types.py +45 -0
  72. huggingface_hub/inference/_mcp/utils.py +128 -0
  73. huggingface_hub/inference/_providers/__init__.py +149 -20
  74. huggingface_hub/inference/_providers/_common.py +160 -37
  75. huggingface_hub/inference/_providers/black_forest_labs.py +12 -9
  76. huggingface_hub/inference/_providers/cerebras.py +6 -0
  77. huggingface_hub/inference/_providers/clarifai.py +13 -0
  78. huggingface_hub/inference/_providers/cohere.py +32 -0
  79. huggingface_hub/inference/_providers/fal_ai.py +231 -22
  80. huggingface_hub/inference/_providers/featherless_ai.py +38 -0
  81. huggingface_hub/inference/_providers/fireworks_ai.py +22 -1
  82. huggingface_hub/inference/_providers/groq.py +9 -0
  83. huggingface_hub/inference/_providers/hf_inference.py +143 -33
  84. huggingface_hub/inference/_providers/hyperbolic.py +9 -5
  85. huggingface_hub/inference/_providers/nebius.py +47 -5
  86. huggingface_hub/inference/_providers/novita.py +48 -5
  87. huggingface_hub/inference/_providers/nscale.py +44 -0
  88. huggingface_hub/inference/_providers/openai.py +25 -0
  89. huggingface_hub/inference/_providers/publicai.py +6 -0
  90. huggingface_hub/inference/_providers/replicate.py +46 -9
  91. huggingface_hub/inference/_providers/sambanova.py +37 -1
  92. huggingface_hub/inference/_providers/scaleway.py +28 -0
  93. huggingface_hub/inference/_providers/together.py +34 -5
  94. huggingface_hub/inference/_providers/wavespeed.py +138 -0
  95. huggingface_hub/inference/_providers/zai_org.py +17 -0
  96. huggingface_hub/lfs.py +33 -100
  97. huggingface_hub/repocard.py +34 -38
  98. huggingface_hub/repocard_data.py +79 -59
  99. huggingface_hub/serialization/__init__.py +0 -1
  100. huggingface_hub/serialization/_base.py +12 -15
  101. huggingface_hub/serialization/_dduf.py +8 -8
  102. huggingface_hub/serialization/_torch.py +69 -69
  103. huggingface_hub/utils/__init__.py +27 -8
  104. huggingface_hub/utils/_auth.py +7 -7
  105. huggingface_hub/utils/_cache_manager.py +92 -147
  106. huggingface_hub/utils/_chunk_utils.py +2 -3
  107. huggingface_hub/utils/_deprecation.py +1 -1
  108. huggingface_hub/utils/_dotenv.py +55 -0
  109. huggingface_hub/utils/_experimental.py +7 -5
  110. huggingface_hub/utils/_fixes.py +0 -10
  111. huggingface_hub/utils/_git_credential.py +5 -5
  112. huggingface_hub/utils/_headers.py +8 -30
  113. huggingface_hub/utils/_http.py +399 -237
  114. huggingface_hub/utils/_pagination.py +6 -6
  115. huggingface_hub/utils/_parsing.py +98 -0
  116. huggingface_hub/utils/_paths.py +5 -5
  117. huggingface_hub/utils/_runtime.py +74 -22
  118. huggingface_hub/utils/_safetensors.py +21 -21
  119. huggingface_hub/utils/_subprocess.py +13 -11
  120. huggingface_hub/utils/_telemetry.py +4 -4
  121. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
  122. huggingface_hub/utils/_typing.py +25 -5
  123. huggingface_hub/utils/_validators.py +55 -74
  124. huggingface_hub/utils/_verification.py +167 -0
  125. huggingface_hub/utils/_xet.py +235 -0
  126. huggingface_hub/utils/_xet_progress_reporting.py +162 -0
  127. huggingface_hub/utils/insecure_hashlib.py +3 -5
  128. huggingface_hub/utils/logging.py +8 -11
  129. huggingface_hub/utils/tqdm.py +33 -4
  130. {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -82
  131. huggingface_hub-1.1.3.dist-info/RECORD +155 -0
  132. {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
  133. huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
  134. huggingface_hub/commands/delete_cache.py +0 -428
  135. huggingface_hub/commands/download.py +0 -200
  136. huggingface_hub/commands/huggingface_cli.py +0 -61
  137. huggingface_hub/commands/lfs.py +0 -200
  138. huggingface_hub/commands/repo_files.py +0 -128
  139. huggingface_hub/commands/scan_cache.py +0 -181
  140. huggingface_hub/commands/tag.py +0 -159
  141. huggingface_hub/commands/upload.py +0 -299
  142. huggingface_hub/commands/upload_large_folder.py +0 -129
  143. huggingface_hub/commands/user.py +0 -304
  144. huggingface_hub/commands/version.py +0 -37
  145. huggingface_hub/inference_api.py +0 -217
  146. huggingface_hub/keras_mixin.py +0 -500
  147. huggingface_hub/repository.py +0 -1477
  148. huggingface_hub/serialization/_tensorflow.py +0 -95
  149. huggingface_hub/utils/_hf_folder.py +0 -68
  150. huggingface_hub-0.29.0rc2.dist-info/RECORD +0 -131
  151. huggingface_hub-0.29.0rc2.dist-info/entry_points.txt +0 -6
  152. {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
  153. {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
@@ -24,14 +24,14 @@ import traceback
24
24
  from datetime import datetime
25
25
  from pathlib import Path
26
26
  from threading import Lock
27
- from typing import TYPE_CHECKING, List, Optional, Tuple, Union
27
+ from typing import TYPE_CHECKING, Any, Optional, Union
28
+ from urllib.parse import quote
28
29
 
29
- from . import constants
30
30
  from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
31
31
  from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
32
32
  from .constants import DEFAULT_REVISION, REPO_TYPES
33
- from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
34
- from .utils._cache_manager import _format_size
33
+ from .utils import DEFAULT_IGNORE_PATTERNS, _format_size, filter_repo_objects, tqdm
34
+ from .utils._runtime import is_xet_available
35
35
  from .utils.sha import sha_fileobj
36
36
 
37
37
 
@@ -41,8 +41,113 @@ if TYPE_CHECKING:
41
41
  logger = logging.getLogger(__name__)
42
42
 
43
43
  WAITING_TIME_IF_NO_TASKS = 10 # seconds
44
- MAX_NB_REGULAR_FILES_PER_COMMIT = 75
45
- MAX_NB_LFS_FILES_PER_COMMIT = 150
44
+ MAX_NB_FILES_FETCH_UPLOAD_MODE = 100
45
+ COMMIT_SIZE_SCALE: list[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
46
+
47
+ UPLOAD_BATCH_SIZE_XET = 256 # Max 256 files per upload batch for XET-enabled repos
48
+ UPLOAD_BATCH_SIZE_LFS = 1 # Otherwise, batches of 1 for regular LFS upload
49
+
50
+ # Repository limits (from https://huggingface.co/docs/hub/repositories-recommendations)
51
+ MAX_FILES_PER_REPO = 100_000 # Recommended maximum number of files per repository
52
+ MAX_FILES_PER_FOLDER = 10_000 # Recommended maximum number of files per folder
53
+ MAX_FILE_SIZE_GB = 50 # Hard limit for individual file size
54
+ RECOMMENDED_FILE_SIZE_GB = 20 # Recommended maximum for individual file size
55
+
56
+
57
+ def _validate_upload_limits(paths_list: list[LocalUploadFilePaths]) -> None:
58
+ """
59
+ Validate upload against repository limits and warn about potential issues.
60
+
61
+ Args:
62
+ paths_list: List of file paths to be uploaded
63
+
64
+ Warns about:
65
+ - Too many files in the repository (>100k)
66
+ - Too many entries (files or subdirectories) in a single folder (>10k)
67
+ - Files exceeding size limits (>20GB recommended, >50GB hard limit)
68
+ """
69
+ logger.info("Running validation checks on files to upload...")
70
+
71
+ # Check 1: Total file count
72
+ if len(paths_list) > MAX_FILES_PER_REPO:
73
+ logger.warning(
74
+ f"You are about to upload {len(paths_list):,} files. "
75
+ f"This exceeds the recommended limit of {MAX_FILES_PER_REPO:,} files per repository.\n"
76
+ f"Consider:\n"
77
+ f" - Splitting your data into multiple repositories\n"
78
+ f" - Using fewer, larger files (e.g., parquet files)\n"
79
+ f" - See: https://huggingface.co/docs/hub/repositories-recommendations"
80
+ )
81
+
82
+ # Check 2: Files and subdirectories per folder
83
+ # Track immediate children (files and subdirs) for each folder
84
+ from collections import defaultdict
85
+
86
+ entries_per_folder: dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
87
+
88
+ for paths in paths_list:
89
+ path = Path(paths.path_in_repo)
90
+ parts = path.parts
91
+
92
+ # Count this file in its immediate parent directory
93
+ parent = str(path.parent) if str(path.parent) != "." else "."
94
+ entries_per_folder[parent]["files"] += 1
95
+
96
+ # Track immediate subdirectories for each parent folder
97
+ # Walk through the path components to track parent-child relationships
98
+ for i, child in enumerate(parts[:-1]):
99
+ parent = "." if i == 0 else "/".join(parts[:i])
100
+ entries_per_folder[parent]["subdirs"].add(child)
101
+
102
+ # Check limits for each folder
103
+ for folder, data in entries_per_folder.items():
104
+ file_count = data["files"]
105
+ subdir_count = len(data["subdirs"])
106
+ total_entries = file_count + subdir_count
107
+
108
+ if total_entries > MAX_FILES_PER_FOLDER:
109
+ folder_display = "root" if folder == "." else folder
110
+ logger.warning(
111
+ f"Folder '{folder_display}' contains {total_entries:,} entries "
112
+ f"({file_count:,} files and {subdir_count:,} subdirectories). "
113
+ f"This exceeds the recommended {MAX_FILES_PER_FOLDER:,} entries per folder.\n"
114
+ "Consider reorganising into sub-folders."
115
+ )
116
+
117
+ # Check 3: File sizes
118
+ large_files = []
119
+ very_large_files = []
120
+
121
+ for paths in paths_list:
122
+ size = paths.file_path.stat().st_size
123
+ size_gb = size / 1_000_000_000 # Use decimal GB as per Hub limits
124
+
125
+ if size_gb > MAX_FILE_SIZE_GB:
126
+ very_large_files.append((paths.path_in_repo, size_gb))
127
+ elif size_gb > RECOMMENDED_FILE_SIZE_GB:
128
+ large_files.append((paths.path_in_repo, size_gb))
129
+
130
+ # Warn about very large files (>50GB)
131
+ if very_large_files:
132
+ files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in very_large_files[:5])
133
+ more_str = f"\n ... and {len(very_large_files) - 5} more files" if len(very_large_files) > 5 else ""
134
+ logger.warning(
135
+ f"Found {len(very_large_files)} files exceeding the {MAX_FILE_SIZE_GB}GB hard limit:\n"
136
+ f" - {files_str}{more_str}\n"
137
+ f"These files may fail to upload. Consider splitting them into smaller chunks."
138
+ )
139
+
140
+ # Warn about large files (>20GB)
141
+ if large_files:
142
+ files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in large_files[:5])
143
+ more_str = f"\n ... and {len(large_files) - 5} more files" if len(large_files) > 5 else ""
144
+ logger.warning(
145
+ f"Found {len(large_files)} files larger than {RECOMMENDED_FILE_SIZE_GB}GB (recommended limit):\n"
146
+ f" - {files_str}{more_str}\n"
147
+ f"Large files may slow down loading and processing."
148
+ )
149
+
150
+ logger.info("Validation checks complete.")
46
151
 
47
152
 
48
153
  def upload_large_folder_internal(
@@ -53,8 +158,8 @@ def upload_large_folder_internal(
53
158
  repo_type: str, # Repo type is required!
54
159
  revision: Optional[str] = None,
55
160
  private: Optional[bool] = None,
56
- allow_patterns: Optional[Union[List[str], str]] = None,
57
- ignore_patterns: Optional[Union[List[str], str]] = None,
161
+ allow_patterns: Optional[Union[list[str], str]] = None,
162
+ ignore_patterns: Optional[Union[list[str], str]] = None,
58
163
  num_workers: Optional[int] = None,
59
164
  print_report: bool = True,
60
165
  print_report_every: int = 60,
@@ -92,6 +197,8 @@ def upload_large_folder_internal(
92
197
  repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
93
198
  logger.info(f"Repo created: {repo_url}")
94
199
  repo_id = repo_url.repo_id
200
+ # 2.1 Check if xet is enabled to set batch file upload size
201
+ upload_batch_size = UPLOAD_BATCH_SIZE_XET if is_xet_available() else UPLOAD_BATCH_SIZE_LFS
95
202
 
96
203
  # 3. List files to upload
97
204
  filtered_paths_list = filter_repo_objects(
@@ -102,6 +209,11 @@ def upload_large_folder_internal(
102
209
  paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
103
210
  logger.info(f"Found {len(paths_list)} candidate files to upload")
104
211
 
212
+ # Validate upload against repository limits
213
+ _validate_upload_limits(paths_list)
214
+
215
+ logger.info("Starting upload...")
216
+
105
217
  # Read metadata for each file
106
218
  items = [
107
219
  (paths, read_upload_metadata(folder_path, paths.path_in_repo))
@@ -109,7 +221,7 @@ def upload_large_folder_internal(
109
221
  ]
110
222
 
111
223
  # 4. Start workers
112
- status = LargeUploadStatus(items)
224
+ status = LargeUploadStatus(items, upload_batch_size)
113
225
  threads = [
114
226
  threading.Thread(
115
227
  target=_worker_job,
@@ -161,13 +273,13 @@ class WorkerJob(enum.Enum):
161
273
  WAIT = enum.auto() # if no tasks are available but we don't want to exit
162
274
 
163
275
 
164
- JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
276
+ JOB_ITEM_T = tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
165
277
 
166
278
 
167
279
  class LargeUploadStatus:
168
280
  """Contains information, queues and tasks for a large upload process."""
169
281
 
170
- def __init__(self, items: List[JOB_ITEM_T]):
282
+ def __init__(self, items: list[JOB_ITEM_T], upload_batch_size: int = 1):
171
283
  self.items = items
172
284
  self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
173
285
  self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
@@ -178,11 +290,14 @@ class LargeUploadStatus:
178
290
  self.nb_workers_sha256: int = 0
179
291
  self.nb_workers_get_upload_mode: int = 0
180
292
  self.nb_workers_preupload_lfs: int = 0
293
+ self.upload_batch_size: int = upload_batch_size
181
294
  self.nb_workers_commit: int = 0
182
295
  self.nb_workers_waiting: int = 0
183
296
  self.last_commit_attempt: Optional[float] = None
184
297
 
185
298
  self._started_at = datetime.now()
299
+ self._chunk_idx: int = 1
300
+ self._chunk_lock: Lock = Lock()
186
301
 
187
302
  # Setup queues
188
303
  for item in self.items:
@@ -198,6 +313,21 @@ class LargeUploadStatus:
198
313
  else:
199
314
  logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
200
315
 
316
+ def target_chunk(self) -> int:
317
+ with self._chunk_lock:
318
+ return COMMIT_SIZE_SCALE[self._chunk_idx]
319
+
320
+ def update_chunk(self, success: bool, nb_items: int, duration: float) -> None:
321
+ with self._chunk_lock:
322
+ if not success:
323
+ logger.warning(f"Failed to commit {nb_items} files at once. Will retry with less files in next batch.")
324
+ self._chunk_idx -= 1
325
+ elif nb_items >= COMMIT_SIZE_SCALE[self._chunk_idx] and duration < 40:
326
+ logger.info(f"Successfully committed {nb_items} at once. Increasing the limit for next batch.")
327
+ self._chunk_idx += 1
328
+
329
+ self._chunk_idx = max(0, min(self._chunk_idx, len(COMMIT_SIZE_SCALE) - 1))
330
+
201
331
  def current_report(self) -> str:
202
332
  """Generate a report of the current status of the large upload."""
203
333
  nb_hashed = 0
@@ -282,7 +412,7 @@ def _worker_job(
282
412
  Read `upload_large_folder` docstring for more information on how tasks are prioritized.
283
413
  """
284
414
  while True:
285
- next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None
415
+ next_job: Optional[tuple[WorkerJob, list[JOB_ITEM_T]]] = None
286
416
 
287
417
  # Determine next task
288
418
  next_job = _determine_next_job(status)
@@ -335,21 +465,24 @@ def _worker_job(
335
465
  status.nb_workers_get_upload_mode -= 1
336
466
 
337
467
  elif job == WorkerJob.PREUPLOAD_LFS:
338
- item = items[0] # single item
339
468
  try:
340
- _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
341
- status.queue_commit.put(item)
469
+ _preupload_lfs(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
470
+ for item in items:
471
+ status.queue_commit.put(item)
342
472
  except KeyboardInterrupt:
343
473
  raise
344
474
  except Exception as e:
345
475
  logger.error(f"Failed to preupload LFS: {e}")
346
476
  traceback.format_exc()
347
- status.queue_preupload_lfs.put(item)
477
+ for item in items:
478
+ status.queue_preupload_lfs.put(item)
348
479
 
349
480
  with status.lock:
350
481
  status.nb_workers_preupload_lfs -= 1
351
482
 
352
483
  elif job == WorkerJob.COMMIT:
484
+ start_ts = time.time()
485
+ success = True
353
486
  try:
354
487
  _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
355
488
  except KeyboardInterrupt:
@@ -359,6 +492,9 @@ def _worker_job(
359
492
  traceback.format_exc()
360
493
  for item in items:
361
494
  status.queue_commit.put(item)
495
+ success = False
496
+ duration = time.time() - start_ts
497
+ status.update_chunk(success, len(items), duration)
362
498
  with status.lock:
363
499
  status.last_commit_attempt = time.time()
364
500
  status.nb_workers_commit -= 1
@@ -369,7 +505,7 @@ def _worker_job(
369
505
  status.nb_workers_waiting -= 1
370
506
 
371
507
 
372
- def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]:
508
+ def _determine_next_job(status: LargeUploadStatus) -> Optional[tuple[WorkerJob, list[JOB_ITEM_T]]]:
373
509
  with status.lock:
374
510
  # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
375
511
  if (
@@ -380,25 +516,25 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
380
516
  ):
381
517
  status.nb_workers_commit += 1
382
518
  logger.debug("Job: commit (more than 5 minutes since last commit attempt)")
383
- return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
519
+ return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
384
520
 
385
521
  # 2. Commit if at least 100 files are ready to commit
386
522
  elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150:
387
523
  status.nb_workers_commit += 1
388
524
  logger.debug("Job: commit (>100 files ready)")
389
- return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
525
+ return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
390
526
 
391
- # 3. Get upload mode if at least 10 files
392
- elif status.queue_get_upload_mode.qsize() >= 10:
527
+ # 3. Get upload mode if at least 100 files
528
+ elif status.queue_get_upload_mode.qsize() >= MAX_NB_FILES_FETCH_UPLOAD_MODE:
393
529
  status.nb_workers_get_upload_mode += 1
394
- logger.debug("Job: get upload mode (>10 files ready)")
395
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
530
+ logger.debug(f"Job: get upload mode (>{MAX_NB_FILES_FETCH_UPLOAD_MODE} files ready)")
531
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
396
532
 
397
- # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
398
- elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
533
+ # 4. Preupload LFS file if at least `status.upload_batch_size` files and no worker is preuploading LFS
534
+ elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and status.nb_workers_preupload_lfs == 0:
399
535
  status.nb_workers_preupload_lfs += 1
400
536
  logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
401
- return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
537
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
402
538
 
403
539
  # 5. Compute sha256 if at least 1 file and no worker is computing sha256
404
540
  elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
@@ -410,16 +546,13 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
410
546
  elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
411
547
  status.nb_workers_get_upload_mode += 1
412
548
  logger.debug("Job: get upload mode (no other worker getting upload mode)")
413
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
549
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
414
550
 
415
- # 7. Preupload LFS file if at least 1 file
416
- # Skip if hf_transfer is enabled and there is already a worker preuploading LFS
417
- elif status.queue_preupload_lfs.qsize() > 0 and (
418
- status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
419
- ):
551
+ # 7. Preupload LFS file if at least `status.upload_batch_size` files
552
+ elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size:
420
553
  status.nb_workers_preupload_lfs += 1
421
554
  logger.debug("Job: preupload LFS")
422
- return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
555
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
423
556
 
424
557
  # 8. Compute sha256 if at least 1 file
425
558
  elif status.queue_sha256.qsize() > 0:
@@ -431,9 +564,15 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
431
564
  elif status.queue_get_upload_mode.qsize() > 0:
432
565
  status.nb_workers_get_upload_mode += 1
433
566
  logger.debug("Job: get upload mode")
434
- return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
567
+ return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
435
568
 
436
- # 10. Commit if at least 1 file and 1 min since last commit attempt
569
+ # 10. Preupload LFS file if at least 1 file
570
+ elif status.queue_preupload_lfs.qsize() > 0:
571
+ status.nb_workers_preupload_lfs += 1
572
+ logger.debug("Job: preupload LFS")
573
+ return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
574
+
575
+ # 11. Commit if at least 1 file and 1 min since last commit attempt
437
576
  elif (
438
577
  status.nb_workers_commit == 0
439
578
  and status.queue_commit.qsize() > 0
@@ -442,9 +581,9 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
442
581
  ):
443
582
  status.nb_workers_commit += 1
444
583
  logger.debug("Job: commit (1 min since last commit attempt)")
445
- return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
584
+ return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
446
585
 
447
- # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
586
+ # 12. Commit if at least 1 file all other queues are empty and all workers are waiting
448
587
  # e.g. when it's the last commit
449
588
  elif (
450
589
  status.nb_workers_commit == 0
@@ -458,14 +597,14 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
458
597
  ):
459
598
  status.nb_workers_commit += 1
460
599
  logger.debug("Job: commit")
461
- return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
600
+ return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
462
601
 
463
- # 12. If all queues are empty, exit
602
+ # 13. If all queues are empty, exit
464
603
  elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
465
604
  logger.info("All files have been processed! Exiting worker.")
466
605
  return None
467
606
 
468
- # 13. If no task is available, wait
607
+ # 14. If no task is available, wait
469
608
  else:
470
609
  status.nb_workers_waiting += 1
471
610
  logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
@@ -486,7 +625,7 @@ def _compute_sha256(item: JOB_ITEM_T) -> None:
486
625
  metadata.save(paths)
487
626
 
488
627
 
489
- def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
628
+ def _get_upload_mode(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
490
629
  """Get upload mode for each file and update metadata.
491
630
 
492
631
  Also receive info if the file should be ignored.
@@ -497,31 +636,33 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
497
636
  repo_type=repo_type,
498
637
  repo_id=repo_id,
499
638
  headers=api._build_hf_headers(),
500
- revision=revision,
639
+ revision=quote(revision, safe=""),
640
+ endpoint=api.endpoint,
501
641
  )
502
642
  for item, addition in zip(items, additions):
503
643
  paths, metadata = item
504
644
  metadata.upload_mode = addition._upload_mode
505
645
  metadata.should_ignore = addition._should_ignore
646
+ metadata.remote_oid = addition._remote_oid
506
647
  metadata.save(paths)
507
648
 
508
649
 
509
- def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
510
- """Preupload LFS file and update metadata."""
511
- paths, metadata = item
512
- addition = _build_hacky_operation(item)
650
+ def _preupload_lfs(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
651
+ """Preupload LFS files and update metadata."""
652
+ additions = [_build_hacky_operation(item) for item in items]
513
653
  api.preupload_lfs_files(
514
654
  repo_id=repo_id,
515
655
  repo_type=repo_type,
516
656
  revision=revision,
517
- additions=[addition],
657
+ additions=additions,
518
658
  )
519
659
 
520
- metadata.is_uploaded = True
521
- metadata.save(paths)
660
+ for paths, metadata in items:
661
+ metadata.is_uploaded = True
662
+ metadata.save(paths)
522
663
 
523
664
 
524
- def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
665
+ def _commit(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
525
666
  """Commit files to the repo."""
526
667
  additions = [_build_hacky_operation(item) for item in items]
527
668
  api.create_commit(
@@ -555,6 +696,9 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
555
696
  if metadata.sha256 is None:
556
697
  raise ValueError("sha256 must have been computed by now!")
557
698
  operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample)
699
+ operation._upload_mode = metadata.upload_mode # type: ignore[assignment]
700
+ operation._should_ignore = metadata.should_ignore
701
+ operation._remote_oid = metadata.remote_oid
558
702
  return operation
559
703
 
560
704
 
@@ -563,38 +707,14 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
563
707
  ####################
564
708
 
565
709
 
566
- def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
710
+ def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> list[JOB_ITEM_T]:
567
711
  return [queue.get()]
568
712
 
569
713
 
570
- def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
714
+ def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> list[JOB_ITEM_T]:
571
715
  return [queue.get() for _ in range(min(queue.qsize(), n))]
572
716
 
573
717
 
574
- def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
575
- """Special case for commit job: the number of items to commit depends on the type of files."""
576
- # Can take at most 50 regular files and/or 100 LFS files in a single commit
577
- items: List[JOB_ITEM_T] = []
578
- nb_lfs, nb_regular = 0, 0
579
- while True:
580
- # If empty queue => commit everything
581
- if queue.qsize() == 0:
582
- return items
583
-
584
- # If we have enough items => commit them
585
- if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT:
586
- return items
587
-
588
- # Else, get a new item and increase counter
589
- item = queue.get()
590
- items.append(item)
591
- _, metadata = item
592
- if metadata.upload_mode == "lfs":
593
- nb_lfs += 1
594
- else:
595
- nb_regular += 1
596
-
597
-
598
718
  def _print_overwrite(report: str) -> None:
599
719
  """Print a report, overwriting the previous lines.
600
720
 
@@ -14,7 +14,7 @@
14
14
  # limitations under the License.
15
15
  """Contains data structures to parse the webhooks payload."""
16
16
 
17
- from typing import List, Literal, Optional
17
+ from typing import Literal, Optional
18
18
 
19
19
  from .utils import is_pydantic_available
20
20
 
@@ -116,7 +116,7 @@ class WebhookPayloadRepo(ObjectId):
116
116
  name: str
117
117
  private: bool
118
118
  subdomain: Optional[str] = None
119
- tags: Optional[List[str]] = None
119
+ tags: Optional[list[str]] = None
120
120
  type: Literal["dataset", "model", "space"]
121
121
  url: WebhookPayloadUrl
122
122
 
@@ -134,4 +134,4 @@ class WebhookPayload(BaseModel):
134
134
  comment: Optional[WebhookPayloadComment] = None
135
135
  webhook: WebhookPayloadWebhook
136
136
  movedTo: Optional[WebhookPayloadMovedTo] = None
137
- updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None
137
+ updatedRefs: Optional[list[WebhookPayloadUpdatedRef]] = None
@@ -18,7 +18,7 @@ import atexit
18
18
  import inspect
19
19
  import os
20
20
  from functools import wraps
21
- from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
21
+ from typing import TYPE_CHECKING, Any, Callable, Optional
22
22
 
23
23
  from .utils import experimental, is_fastapi_available, is_gradio_available
24
24
 
@@ -32,7 +32,7 @@ if is_fastapi_available():
32
32
  from fastapi.responses import JSONResponse
33
33
  else:
34
34
  # Will fail at runtime if FastAPI is not available
35
- FastAPI = Request = JSONResponse = None # type: ignore [misc, assignment]
35
+ FastAPI = Request = JSONResponse = None # type: ignore
36
36
 
37
37
 
38
38
  _global_app: Optional["WebhooksServer"] = None
@@ -50,20 +50,14 @@ class WebhooksServer:
50
50
  It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic
51
51
  model that contains all the information about the webhook event. The data will be parsed automatically for you.
52
52
 
53
- Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
53
+ Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to set up your
54
54
  WebhooksServer and deploy it on a Space.
55
55
 
56
- <Tip warning={true}>
56
+ > [!WARNING]
57
+ > `WebhooksServer` is experimental. Its API is subject to change in the future.
57
58
 
58
- `WebhooksServer` is experimental. Its API is subject to change in the future.
59
-
60
- </Tip>
61
-
62
- <Tip warning={true}>
63
-
64
- You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
65
-
66
- </Tip>
59
+ > [!WARNING]
60
+ > You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
67
61
 
68
62
  Args:
69
63
  ui (`gradio.Blocks`, optional):
@@ -115,7 +109,7 @@ class WebhooksServer:
115
109
  self._ui = ui
116
110
 
117
111
  self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
118
- self.registered_webhooks: Dict[str, Callable] = {}
112
+ self.registered_webhooks: dict[str, Callable] = {}
119
113
  _warn_on_empty_secret(self.webhook_secret)
120
114
 
121
115
  def add_webhook(self, path: Optional[str] = None) -> Callable:
@@ -186,6 +180,8 @@ class WebhooksServer:
186
180
  # Print instructions and block main thread
187
181
  space_host = os.environ.get("SPACE_HOST")
188
182
  url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
183
+ if url is None:
184
+ raise ValueError("Cannot find the URL of the app. Please provide a valid `ui` or update `gradio` version.")
189
185
  url = url.strip("/")
190
186
  message = "\nWebhooks are correctly setup and ready to use:"
191
187
  message += "\n" + "\n".join(f" - POST {url}{webhook}" for webhook in self.registered_webhooks)
@@ -235,20 +231,14 @@ def webhook_endpoint(path: Optional[str] = None) -> Callable:
235
231
  you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using
236
232
  this decorator multiple times.
237
233
 
238
- Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
234
+ Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to set up your
239
235
  server and deploy it on a Space.
240
236
 
241
- <Tip warning={true}>
242
-
243
- `webhook_endpoint` is experimental. Its API is subject to change in the future.
244
-
245
- </Tip>
246
-
247
- <Tip warning={true}>
248
-
249
- You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
237
+ > [!WARNING]
238
+ > `webhook_endpoint` is experimental. Its API is subject to change in the future.
250
239
 
251
- </Tip>
240
+ > [!WARNING]
241
+ > You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
252
242
 
253
243
  Args:
254
244
  path (`str`, optional):
@@ -1,4 +1,4 @@
1
- # Copyright 2020 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -11,17 +11,3 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
- from abc import ABC, abstractmethod
16
- from argparse import _SubParsersAction
17
-
18
-
19
- class BaseHuggingfaceCLICommand(ABC):
20
- @staticmethod
21
- @abstractmethod
22
- def register_subcommand(parser: _SubParsersAction):
23
- raise NotImplementedError()
24
-
25
- @abstractmethod
26
- def run(self):
27
- raise NotImplementedError()