huggingface-hub 0.35.0rc0__py3-none-any.whl → 0.35.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (50) hide show
  1. huggingface_hub/__init__.py +19 -1
  2. huggingface_hub/_jobs_api.py +168 -12
  3. huggingface_hub/_local_folder.py +1 -1
  4. huggingface_hub/_oauth.py +5 -9
  5. huggingface_hub/_tensorboard_logger.py +9 -10
  6. huggingface_hub/_upload_large_folder.py +108 -1
  7. huggingface_hub/cli/auth.py +4 -1
  8. huggingface_hub/cli/cache.py +7 -9
  9. huggingface_hub/cli/hf.py +2 -5
  10. huggingface_hub/cli/jobs.py +591 -13
  11. huggingface_hub/cli/repo.py +10 -4
  12. huggingface_hub/commands/delete_cache.py +2 -2
  13. huggingface_hub/commands/scan_cache.py +1 -1
  14. huggingface_hub/dataclasses.py +3 -0
  15. huggingface_hub/file_download.py +12 -10
  16. huggingface_hub/hf_api.py +549 -95
  17. huggingface_hub/hf_file_system.py +4 -10
  18. huggingface_hub/hub_mixin.py +5 -3
  19. huggingface_hub/inference/_client.py +98 -181
  20. huggingface_hub/inference/_common.py +72 -70
  21. huggingface_hub/inference/_generated/_async_client.py +116 -201
  22. huggingface_hub/inference/_generated/types/chat_completion.py +2 -0
  23. huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
  24. huggingface_hub/inference/_mcp/cli.py +1 -1
  25. huggingface_hub/inference/_mcp/constants.py +1 -1
  26. huggingface_hub/inference/_mcp/mcp_client.py +28 -11
  27. huggingface_hub/inference/_mcp/types.py +3 -0
  28. huggingface_hub/inference/_mcp/utils.py +7 -3
  29. huggingface_hub/inference/_providers/__init__.py +13 -0
  30. huggingface_hub/inference/_providers/_common.py +29 -4
  31. huggingface_hub/inference/_providers/black_forest_labs.py +1 -1
  32. huggingface_hub/inference/_providers/fal_ai.py +33 -2
  33. huggingface_hub/inference/_providers/hf_inference.py +15 -7
  34. huggingface_hub/inference/_providers/publicai.py +6 -0
  35. huggingface_hub/inference/_providers/replicate.py +1 -1
  36. huggingface_hub/inference/_providers/scaleway.py +28 -0
  37. huggingface_hub/lfs.py +2 -4
  38. huggingface_hub/repocard.py +2 -1
  39. huggingface_hub/utils/_dotenv.py +24 -20
  40. huggingface_hub/utils/_git_credential.py +1 -1
  41. huggingface_hub/utils/_http.py +3 -5
  42. huggingface_hub/utils/_runtime.py +1 -0
  43. huggingface_hub/utils/_typing.py +24 -4
  44. huggingface_hub/utils/_xet_progress_reporting.py +31 -10
  45. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/METADATA +7 -4
  46. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/RECORD +50 -48
  47. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/LICENSE +0 -0
  48. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/WHEEL +0 -0
  49. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/entry_points.txt +0 -0
  50. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/top_level.txt +0 -0
@@ -46,7 +46,7 @@ import sys
46
46
  from typing import TYPE_CHECKING
47
47
 
48
48
 
49
- __version__ = "0.35.0.rc0"
49
+ __version__ = "0.35.1"
50
50
 
51
51
  # Alphabetical order of definitions is ensured in tests
52
52
  # WARNING: any comment added in this dictionary definition will be lost when
@@ -182,6 +182,8 @@ _SUBMOD_ATTRS = {
182
182
  "create_inference_endpoint_from_catalog",
183
183
  "create_pull_request",
184
184
  "create_repo",
185
+ "create_scheduled_job",
186
+ "create_scheduled_uv_job",
185
187
  "create_tag",
186
188
  "create_webhook",
187
189
  "dataset_info",
@@ -192,6 +194,7 @@ _SUBMOD_ATTRS = {
192
194
  "delete_folder",
193
195
  "delete_inference_endpoint",
194
196
  "delete_repo",
197
+ "delete_scheduled_job",
195
198
  "delete_space_secret",
196
199
  "delete_space_storage",
197
200
  "delete_space_variable",
@@ -219,6 +222,7 @@ _SUBMOD_ATTRS = {
219
222
  "get_webhook",
220
223
  "grant_access",
221
224
  "inspect_job",
225
+ "inspect_scheduled_job",
222
226
  "list_accepted_access_requests",
223
227
  "list_collections",
224
228
  "list_datasets",
@@ -259,6 +263,7 @@ _SUBMOD_ATTRS = {
259
263
  "request_space_storage",
260
264
  "restart_space",
261
265
  "resume_inference_endpoint",
266
+ "resume_scheduled_job",
262
267
  "revision_exists",
263
268
  "run_as_future",
264
269
  "run_job",
@@ -267,6 +272,7 @@ _SUBMOD_ATTRS = {
267
272
  "set_space_sleep_time",
268
273
  "space_info",
269
274
  "super_squash_history",
275
+ "suspend_scheduled_job",
270
276
  "unlike",
271
277
  "update_collection_item",
272
278
  "update_collection_metadata",
@@ -828,6 +834,8 @@ __all__ = [
828
834
  "create_inference_endpoint_from_catalog",
829
835
  "create_pull_request",
830
836
  "create_repo",
837
+ "create_scheduled_job",
838
+ "create_scheduled_uv_job",
831
839
  "create_tag",
832
840
  "create_webhook",
833
841
  "dataset_info",
@@ -838,6 +846,7 @@ __all__ = [
838
846
  "delete_folder",
839
847
  "delete_inference_endpoint",
840
848
  "delete_repo",
849
+ "delete_scheduled_job",
841
850
  "delete_space_secret",
842
851
  "delete_space_storage",
843
852
  "delete_space_variable",
@@ -878,6 +887,7 @@ __all__ = [
878
887
  "hf_hub_download",
879
888
  "hf_hub_url",
880
889
  "inspect_job",
890
+ "inspect_scheduled_job",
881
891
  "interpreter_login",
882
892
  "list_accepted_access_requests",
883
893
  "list_collections",
@@ -933,6 +943,7 @@ __all__ = [
933
943
  "request_space_storage",
934
944
  "restart_space",
935
945
  "resume_inference_endpoint",
946
+ "resume_scheduled_job",
936
947
  "revision_exists",
937
948
  "run_as_future",
938
949
  "run_job",
@@ -949,6 +960,7 @@ __all__ = [
949
960
  "split_tf_state_dict_into_shards",
950
961
  "split_torch_state_dict_into_shards",
951
962
  "super_squash_history",
963
+ "suspend_scheduled_job",
952
964
  "try_to_load_from_cache",
953
965
  "unlike",
954
966
  "update_collection_item",
@@ -1190,6 +1202,8 @@ if TYPE_CHECKING: # pragma: no cover
1190
1202
  create_inference_endpoint_from_catalog, # noqa: F401
1191
1203
  create_pull_request, # noqa: F401
1192
1204
  create_repo, # noqa: F401
1205
+ create_scheduled_job, # noqa: F401
1206
+ create_scheduled_uv_job, # noqa: F401
1193
1207
  create_tag, # noqa: F401
1194
1208
  create_webhook, # noqa: F401
1195
1209
  dataset_info, # noqa: F401
@@ -1200,6 +1214,7 @@ if TYPE_CHECKING: # pragma: no cover
1200
1214
  delete_folder, # noqa: F401
1201
1215
  delete_inference_endpoint, # noqa: F401
1202
1216
  delete_repo, # noqa: F401
1217
+ delete_scheduled_job, # noqa: F401
1203
1218
  delete_space_secret, # noqa: F401
1204
1219
  delete_space_storage, # noqa: F401
1205
1220
  delete_space_variable, # noqa: F401
@@ -1227,6 +1242,7 @@ if TYPE_CHECKING: # pragma: no cover
1227
1242
  get_webhook, # noqa: F401
1228
1243
  grant_access, # noqa: F401
1229
1244
  inspect_job, # noqa: F401
1245
+ inspect_scheduled_job, # noqa: F401
1230
1246
  list_accepted_access_requests, # noqa: F401
1231
1247
  list_collections, # noqa: F401
1232
1248
  list_datasets, # noqa: F401
@@ -1267,6 +1283,7 @@ if TYPE_CHECKING: # pragma: no cover
1267
1283
  request_space_storage, # noqa: F401
1268
1284
  restart_space, # noqa: F401
1269
1285
  resume_inference_endpoint, # noqa: F401
1286
+ resume_scheduled_job, # noqa: F401
1270
1287
  revision_exists, # noqa: F401
1271
1288
  run_as_future, # noqa: F401
1272
1289
  run_job, # noqa: F401
@@ -1275,6 +1292,7 @@ if TYPE_CHECKING: # pragma: no cover
1275
1292
  set_space_sleep_time, # noqa: F401
1276
1293
  space_info, # noqa: F401
1277
1294
  super_squash_history, # noqa: F401
1295
+ suspend_scheduled_job, # noqa: F401
1278
1296
  unlike, # noqa: F401
1279
1297
  update_collection_item, # noqa: F401
1280
1298
  update_collection_metadata, # noqa: F401
@@ -15,7 +15,7 @@
15
15
  from dataclasses import dataclass
16
16
  from datetime import datetime
17
17
  from enum import Enum
18
- from typing import Any, Dict, List, Optional
18
+ from typing import Any, Dict, List, Optional, Union
19
19
 
20
20
  from huggingface_hub import constants
21
21
  from huggingface_hub._space_api import SpaceHardware
@@ -47,15 +47,12 @@ class JobStatus:
47
47
  stage: JobStage
48
48
  message: Optional[str]
49
49
 
50
- def __init__(self, **kwargs) -> None:
51
- self.stage = kwargs["stage"]
52
- self.message = kwargs.get("message")
53
-
54
50
 
55
51
  @dataclass
56
52
  class JobOwner:
57
53
  id: str
58
54
  name: str
55
+ type: str
59
56
 
60
57
 
61
58
  @dataclass
@@ -88,8 +85,8 @@ class JobInfo:
88
85
  status: (`JobStatus` or `None`):
89
86
  Status of the Job, e.g. `JobStatus(stage="RUNNING", message=None)`
90
87
  See [`JobStage`] for possible stage values.
91
- status: (`JobOwner` or `None`):
92
- Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq")`
88
+ owner: (`JobOwner` or `None`):
89
+ Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")`
93
90
 
94
91
  Example:
95
92
 
@@ -100,7 +97,7 @@ class JobInfo:
100
97
  ... command=["python", "-c", "print('Hello from the cloud!')"]
101
98
  ... )
102
99
  >>> job
103
- JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
100
+ JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq', type='user'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
104
101
  >>> job.id
105
102
  '687fb701029421ae5549d998'
106
103
  >>> job.url
@@ -119,8 +116,8 @@ class JobInfo:
119
116
  environment: Optional[Dict[str, Any]]
120
117
  secrets: Optional[Dict[str, Any]]
121
118
  flavor: Optional[SpaceHardware]
122
- status: Optional[JobStatus]
123
- owner: Optional[JobOwner]
119
+ status: JobStatus
120
+ owner: JobOwner
124
121
 
125
122
  # Inferred fields
126
123
  endpoint: str
@@ -132,14 +129,173 @@ class JobInfo:
132
129
  self.created_at = parse_datetime(created_at) if created_at else None
133
130
  self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
134
131
  self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
135
- self.owner = JobOwner(**(kwargs["owner"] if isinstance(kwargs.get("owner"), dict) else {}))
132
+ owner = kwargs.get("owner", {})
133
+ self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"])
136
134
  self.command = kwargs.get("command")
137
135
  self.arguments = kwargs.get("arguments")
138
136
  self.environment = kwargs.get("environment")
139
137
  self.secrets = kwargs.get("secrets")
140
138
  self.flavor = kwargs.get("flavor")
141
- self.status = JobStatus(**(kwargs["status"] if isinstance(kwargs.get("status"), dict) else {}))
139
+ status = kwargs.get("status", {})
140
+ self.status = JobStatus(stage=status["stage"], message=status.get("message"))
142
141
 
143
142
  # Inferred fields
144
143
  self.endpoint = kwargs.get("endpoint", constants.ENDPOINT)
145
144
  self.url = f"{self.endpoint}/jobs/{self.owner.name}/{self.id}"
145
+
146
+
147
+ @dataclass
148
+ class JobSpec:
149
+ docker_image: Optional[str]
150
+ space_id: Optional[str]
151
+ command: Optional[List[str]]
152
+ arguments: Optional[List[str]]
153
+ environment: Optional[Dict[str, Any]]
154
+ secrets: Optional[Dict[str, Any]]
155
+ flavor: Optional[SpaceHardware]
156
+ timeout: Optional[int]
157
+ tags: Optional[List[str]]
158
+ arch: Optional[str]
159
+
160
+ def __init__(self, **kwargs) -> None:
161
+ self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
162
+ self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
163
+ self.command = kwargs.get("command")
164
+ self.arguments = kwargs.get("arguments")
165
+ self.environment = kwargs.get("environment")
166
+ self.secrets = kwargs.get("secrets")
167
+ self.flavor = kwargs.get("flavor")
168
+ self.timeout = kwargs.get("timeout")
169
+ self.tags = kwargs.get("tags")
170
+ self.arch = kwargs.get("arch")
171
+
172
+
173
+ @dataclass
174
+ class LastJobInfo:
175
+ id: str
176
+ at: datetime
177
+
178
+ def __init__(self, **kwargs) -> None:
179
+ self.id = kwargs["id"]
180
+ self.at = parse_datetime(kwargs["at"])
181
+
182
+
183
+ @dataclass
184
+ class ScheduledJobStatus:
185
+ last_job: Optional[LastJobInfo]
186
+ next_job_run_at: Optional[datetime]
187
+
188
+ def __init__(self, **kwargs) -> None:
189
+ last_job = kwargs.get("lastJob") or kwargs.get("last_job")
190
+ self.last_job = LastJobInfo(**last_job) if last_job else None
191
+ next_job_run_at = kwargs.get("nextJobRunAt") or kwargs.get("next_job_run_at")
192
+ self.next_job_run_at = parse_datetime(str(next_job_run_at)) if next_job_run_at else None
193
+
194
+
195
+ @dataclass
196
+ class ScheduledJobInfo:
197
+ """
198
+ Contains information about a Job.
199
+
200
+ Args:
201
+ id (`str`):
202
+ Scheduled Job ID.
203
+ created_at (`datetime` or `None`):
204
+ When the scheduled Job was created.
205
+ tags (`List[str]` or `None`):
206
+ The tags of the scheduled Job.
207
+ schedule (`str` or `None`):
208
+ One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
209
+ CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).
210
+ suspend (`bool` or `None`):
211
+ Whether the scheduled job is suspended (paused).
212
+ concurrency (`bool` or `None`):
213
+ Whether multiple instances of this Job can run concurrently.
214
+ status (`ScheduledJobStatus` or `None`):
215
+ Status of the scheduled Job.
216
+ owner: (`JobOwner` or `None`):
217
+ Owner of the scheduled Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")`
218
+ job_spec: (`JobSpec` or `None`):
219
+ Specifications of the Job.
220
+
221
+ Example:
222
+
223
+ ```python
224
+ >>> from huggingface_hub import run_job
225
+ >>> scheduled_job = create_scheduled_job(
226
+ ... image="python:3.12",
227
+ ... command=["python", "-c", "print('Hello from the cloud!')"],
228
+ ... schedule="@hourly",
229
+ ... )
230
+ >>> scheduled_job.id
231
+ '687fb701029421ae5549d999'
232
+ >>> scheduled_job.status.next_job_run_at
233
+ datetime.datetime(2025, 7, 22, 17, 6, 25, 79000, tzinfo=datetime.timezone.utc)
234
+ ```
235
+ """
236
+
237
+ id: str
238
+ created_at: Optional[datetime]
239
+ job_spec: JobSpec
240
+ schedule: Optional[str]
241
+ suspend: Optional[bool]
242
+ concurrency: Optional[bool]
243
+ status: ScheduledJobStatus
244
+ owner: JobOwner
245
+
246
+ def __init__(self, **kwargs) -> None:
247
+ self.id = kwargs["id"]
248
+ created_at = kwargs.get("createdAt") or kwargs.get("created_at")
249
+ self.created_at = parse_datetime(created_at) if created_at else None
250
+ self.job_spec = JobSpec(**(kwargs.get("job_spec") or kwargs.get("jobSpec", {})))
251
+ self.schedule = kwargs.get("schedule")
252
+ self.suspend = kwargs.get("suspend")
253
+ self.concurrency = kwargs.get("concurrency")
254
+ status = kwargs.get("status", {})
255
+ self.status = ScheduledJobStatus(
256
+ last_job=status.get("last_job") or status.get("lastJob"),
257
+ next_job_run_at=status.get("next_job_run_at") or status.get("nextJobRunAt"),
258
+ )
259
+ owner = kwargs.get("owner", {})
260
+ self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"])
261
+
262
+
263
+ def _create_job_spec(
264
+ *,
265
+ image: str,
266
+ command: List[str],
267
+ env: Optional[Dict[str, Any]],
268
+ secrets: Optional[Dict[str, Any]],
269
+ flavor: Optional[SpaceHardware],
270
+ timeout: Optional[Union[int, float, str]],
271
+ ) -> Dict[str, Any]:
272
+ # prepare job spec to send to HF Jobs API
273
+ job_spec: Dict[str, Any] = {
274
+ "command": command,
275
+ "arguments": [],
276
+ "environment": env or {},
277
+ "flavor": flavor or SpaceHardware.CPU_BASIC,
278
+ }
279
+ # secrets are optional
280
+ if secrets:
281
+ job_spec["secrets"] = secrets
282
+ # timeout is optional
283
+ if timeout:
284
+ time_units_factors = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24}
285
+ if isinstance(timeout, str) and timeout[-1] in time_units_factors:
286
+ job_spec["timeoutSeconds"] = int(float(timeout[:-1]) * time_units_factors[timeout[-1]])
287
+ else:
288
+ job_spec["timeoutSeconds"] = int(timeout)
289
+ # input is either from docker hub or from HF spaces
290
+ for prefix in (
291
+ "https://huggingface.co/spaces/",
292
+ "https://hf.co/spaces/",
293
+ "huggingface.co/spaces/",
294
+ "hf.co/spaces/",
295
+ ):
296
+ if image.startswith(prefix):
297
+ job_spec["spaceId"] = image[len(prefix) :]
298
+ break
299
+ else:
300
+ job_spec["dockerImage"] = image
301
+ return job_spec
@@ -90,7 +90,7 @@ class LocalDownloadFilePaths:
90
90
  resolved_path = str(path.resolve())
91
91
  # Some Windows versions do not allow for paths longer than 255 characters.
92
92
  # In this case, we must specify it as an extended path by using the "\\?\" prefix.
93
- if len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"):
93
+ if os.name == "nt" and len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"):
94
94
  path = Path("\\\\?\\" + resolved_path)
95
95
  return path
96
96
 
huggingface_hub/_oauth.py CHANGED
@@ -6,7 +6,7 @@ import time
6
6
  import urllib.parse
7
7
  import warnings
8
8
  from dataclasses import dataclass
9
- from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
9
+ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
10
10
 
11
11
  from . import constants
12
12
  from .hf_api import whoami
@@ -39,10 +39,8 @@ class OAuthOrgInfo:
39
39
  Whether the org has a payment method set up. Hugging Face field.
40
40
  role_in_org (`Optional[str]`, *optional*):
41
41
  The user's role in the org. Hugging Face field.
42
- pending_sso (`Optional[bool]`, *optional*):
43
- Indicates if the user granted the OAuth app access to the org but didn't complete SSO. Hugging Face field.
44
- missing_mfa (`Optional[bool]`, *optional*):
45
- Indicates if the user granted the OAuth app access to the org but didn't complete MFA. Hugging Face field.
42
+ security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
43
+ Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field.
46
44
  """
47
45
 
48
46
  sub: str
@@ -52,8 +50,7 @@ class OAuthOrgInfo:
52
50
  is_enterprise: bool
53
51
  can_pay: Optional[bool] = None
54
52
  role_in_org: Optional[str] = None
55
- pending_sso: Optional[bool] = None
56
- missing_mfa: Optional[bool] = None
53
+ security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None
57
54
 
58
55
 
59
56
  @dataclass
@@ -221,8 +218,7 @@ def parse_huggingface_oauth(request: "fastapi.Request") -> Optional[OAuthInfo]:
221
218
  is_enterprise=org.get("isEnterprise"),
222
219
  can_pay=org.get("canPay"),
223
220
  role_in_org=org.get("roleInOrg"),
224
- pending_sso=org.get("pendingSSO"),
225
- missing_mfa=org.get("missingMFA"),
221
+ security_restrictions=org.get("securityRestrictions"),
226
222
  )
227
223
  for org in orgs_data
228
224
  ]
@@ -14,7 +14,7 @@
14
14
  """Contains a logger to push training logs to the Hub, using Tensorboard."""
15
15
 
16
16
  from pathlib import Path
17
- from typing import TYPE_CHECKING, List, Optional, Union
17
+ from typing import List, Optional, Union
18
18
 
19
19
  from ._commit_scheduler import CommitScheduler
20
20
  from .errors import EntryNotFoundError
@@ -26,25 +26,24 @@ from .utils import experimental
26
26
  # or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
27
27
  # from either of them.
28
28
  try:
29
- from tensorboardX import SummaryWriter
29
+ from tensorboardX import SummaryWriter as _RuntimeSummaryWriter
30
30
 
31
31
  is_summary_writer_available = True
32
-
33
32
  except ImportError:
34
33
  try:
35
- from torch.utils.tensorboard import SummaryWriter
34
+ from torch.utils.tensorboard import SummaryWriter as _RuntimeSummaryWriter
36
35
 
37
- is_summary_writer_available = False
36
+ is_summary_writer_available = True
38
37
  except ImportError:
39
38
  # Dummy class to avoid failing at import. Will raise on instance creation.
40
- SummaryWriter = object
41
- is_summary_writer_available = False
39
+ class _DummySummaryWriter:
40
+ pass
42
41
 
43
- if TYPE_CHECKING:
44
- from tensorboardX import SummaryWriter
42
+ _RuntimeSummaryWriter = _DummySummaryWriter # type: ignore[assignment]
43
+ is_summary_writer_available = False
45
44
 
46
45
 
47
- class HFSummaryWriter(SummaryWriter):
46
+ class HFSummaryWriter(_RuntimeSummaryWriter):
48
47
  """
49
48
  Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
50
49
 
@@ -24,7 +24,7 @@ import traceback
24
24
  from datetime import datetime
25
25
  from pathlib import Path
26
26
  from threading import Lock
27
- from typing import TYPE_CHECKING, List, Optional, Tuple, Union
27
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
28
28
  from urllib.parse import quote
29
29
 
30
30
  from . import constants
@@ -49,6 +49,108 @@ COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
49
49
  UPLOAD_BATCH_SIZE_XET = 256 # Max 256 files per upload batch for XET-enabled repos
50
50
  UPLOAD_BATCH_SIZE_LFS = 1 # Otherwise, batches of 1 for regular LFS upload
51
51
 
52
+ # Repository limits (from https://huggingface.co/docs/hub/repositories-recommendations)
53
+ MAX_FILES_PER_REPO = 100_000 # Recommended maximum number of files per repository
54
+ MAX_FILES_PER_FOLDER = 10_000 # Recommended maximum number of files per folder
55
+ MAX_FILE_SIZE_GB = 50 # Hard limit for individual file size
56
+ RECOMMENDED_FILE_SIZE_GB = 20 # Recommended maximum for individual file size
57
+
58
+
59
+ def _validate_upload_limits(paths_list: List[LocalUploadFilePaths]) -> None:
60
+ """
61
+ Validate upload against repository limits and warn about potential issues.
62
+
63
+ Args:
64
+ paths_list: List of file paths to be uploaded
65
+
66
+ Warns about:
67
+ - Too many files in the repository (>100k)
68
+ - Too many entries (files or subdirectories) in a single folder (>10k)
69
+ - Files exceeding size limits (>20GB recommended, >50GB hard limit)
70
+ """
71
+ logger.info("Running validation checks on files to upload...")
72
+
73
+ # Check 1: Total file count
74
+ if len(paths_list) > MAX_FILES_PER_REPO:
75
+ logger.warning(
76
+ f"You are about to upload {len(paths_list):,} files. "
77
+ f"This exceeds the recommended limit of {MAX_FILES_PER_REPO:,} files per repository.\n"
78
+ f"Consider:\n"
79
+ f" - Splitting your data into multiple repositories\n"
80
+ f" - Using fewer, larger files (e.g., parquet files)\n"
81
+ f" - See: https://huggingface.co/docs/hub/repositories-recommendations"
82
+ )
83
+
84
+ # Check 2: Files and subdirectories per folder
85
+ # Track immediate children (files and subdirs) for each folder
86
+ from collections import defaultdict
87
+
88
+ entries_per_folder: Dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
89
+
90
+ for paths in paths_list:
91
+ path = Path(paths.path_in_repo)
92
+ parts = path.parts
93
+
94
+ # Count this file in its immediate parent directory
95
+ parent = str(path.parent) if str(path.parent) != "." else "."
96
+ entries_per_folder[parent]["files"] += 1
97
+
98
+ # Track immediate subdirectories for each parent folder
99
+ # Walk through the path components to track parent-child relationships
100
+ for i, child in enumerate(parts[:-1]):
101
+ parent = "." if i == 0 else "/".join(parts[:i])
102
+ entries_per_folder[parent]["subdirs"].add(child)
103
+
104
+ # Check limits for each folder
105
+ for folder, data in entries_per_folder.items():
106
+ file_count = data["files"]
107
+ subdir_count = len(data["subdirs"])
108
+ total_entries = file_count + subdir_count
109
+
110
+ if total_entries > MAX_FILES_PER_FOLDER:
111
+ folder_display = "root" if folder == "." else folder
112
+ logger.warning(
113
+ f"Folder '{folder_display}' contains {total_entries:,} entries "
114
+ f"({file_count:,} files and {subdir_count:,} subdirectories). "
115
+ f"This exceeds the recommended {MAX_FILES_PER_FOLDER:,} entries per folder.\n"
116
+ "Consider reorganising into sub-folders."
117
+ )
118
+
119
+ # Check 3: File sizes
120
+ large_files = []
121
+ very_large_files = []
122
+
123
+ for paths in paths_list:
124
+ size = paths.file_path.stat().st_size
125
+ size_gb = size / 1_000_000_000 # Use decimal GB as per Hub limits
126
+
127
+ if size_gb > MAX_FILE_SIZE_GB:
128
+ very_large_files.append((paths.path_in_repo, size_gb))
129
+ elif size_gb > RECOMMENDED_FILE_SIZE_GB:
130
+ large_files.append((paths.path_in_repo, size_gb))
131
+
132
+ # Warn about very large files (>50GB)
133
+ if very_large_files:
134
+ files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in very_large_files[:5])
135
+ more_str = f"\n ... and {len(very_large_files) - 5} more files" if len(very_large_files) > 5 else ""
136
+ logger.warning(
137
+ f"Found {len(very_large_files)} files exceeding the {MAX_FILE_SIZE_GB}GB hard limit:\n"
138
+ f" - {files_str}{more_str}\n"
139
+ f"These files may fail to upload. Consider splitting them into smaller chunks."
140
+ )
141
+
142
+ # Warn about large files (>20GB)
143
+ if large_files:
144
+ files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in large_files[:5])
145
+ more_str = f"\n ... and {len(large_files) - 5} more files" if len(large_files) > 5 else ""
146
+ logger.warning(
147
+ f"Found {len(large_files)} files larger than {RECOMMENDED_FILE_SIZE_GB}GB (recommended limit):\n"
148
+ f" - {files_str}{more_str}\n"
149
+ f"Large files may slow down loading and processing."
150
+ )
151
+
152
+ logger.info("Validation checks complete.")
153
+
52
154
 
53
155
  def upload_large_folder_internal(
54
156
  api: "HfApi",
@@ -118,6 +220,11 @@ def upload_large_folder_internal(
118
220
  paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
119
221
  logger.info(f"Found {len(paths_list)} candidate files to upload")
120
222
 
223
+ # Validate upload against repository limits
224
+ _validate_upload_limits(paths_list)
225
+
226
+ logger.info("Starting upload...")
227
+
121
228
  # Read metadata for each file
122
229
  items = [
123
230
  (paths, read_upload_metadata(folder_path, paths.path_in_repo))
@@ -62,6 +62,9 @@ class AuthCommands(BaseHuggingfaceCLICommand):
62
62
  auth_parser = parser.add_parser("auth", help="Manage authentication (login, logout, etc.).")
63
63
  auth_subparsers = auth_parser.add_subparsers(help="Authentication subcommands")
64
64
 
65
+ # Show help if no subcommand is provided
66
+ auth_parser.set_defaults(func=lambda args: auth_parser.print_help())
67
+
65
68
  # Add 'login' as a subcommand of 'auth'
66
69
  login_parser = auth_subparsers.add_parser(
67
70
  "login", help="Log in using a token from huggingface.co/settings/tokens"
@@ -197,7 +200,7 @@ class AuthWhoami(BaseAuthCommand):
197
200
  exit()
198
201
  try:
199
202
  info = self._api.whoami(token)
200
- print(info["name"])
203
+ print(ANSI.bold("user: "), info["name"])
201
204
  orgs = [org["name"] for org in info["orgs"]]
202
205
  if orgs:
203
206
  print(ANSI.bold("orgs: "), ",".join(orgs))
@@ -21,13 +21,7 @@ from functools import wraps
21
21
  from tempfile import mkstemp
22
22
  from typing import Any, Callable, Iterable, List, Literal, Optional, Union
23
23
 
24
- from ..utils import (
25
- CachedRepoInfo,
26
- CachedRevisionInfo,
27
- CacheNotFound,
28
- HFCacheInfo,
29
- scan_cache_dir,
30
- )
24
+ from ..utils import CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir
31
25
  from . import BaseHuggingfaceCLICommand
32
26
  from ._cli_utils import ANSI, tabulate
33
27
 
@@ -52,7 +46,7 @@ def require_inquirer_py(fn: Callable) -> Callable:
52
46
  if not _inquirer_py_available:
53
47
  raise ImportError(
54
48
  "The 'cache delete' command requires extra dependencies for the TUI.\n"
55
- "Please run 'pip install huggingface_hub[cli]' to install them.\n"
49
+ "Please run 'pip install \"huggingface_hub[cli]\"' to install them.\n"
56
50
  "Otherwise, disable TUI using the '--disable-tui' flag."
57
51
  )
58
52
  return fn(*args, **kwargs)
@@ -65,6 +59,10 @@ class CacheCommand(BaseHuggingfaceCLICommand):
65
59
  def register_subcommand(parser: _SubParsersAction):
66
60
  cache_parser = parser.add_parser("cache", help="Manage local cache directory.")
67
61
  cache_subparsers = cache_parser.add_subparsers(dest="cache_command", help="Cache subcommands")
62
+
63
+ # Show help if no subcommand is provided
64
+ cache_parser.set_defaults(func=lambda args: cache_parser.print_help())
65
+
68
66
  # Scan subcommand
69
67
  scan_parser = cache_subparsers.add_parser("scan", help="Scan cache directory.")
70
68
  scan_parser.add_argument(
@@ -145,7 +143,7 @@ class CacheCommand(BaseHuggingfaceCLICommand):
145
143
  if self.verbosity >= 3:
146
144
  print(ANSI.gray(message))
147
145
  for warning in hf_cache_info.warnings:
148
- print(ANSI.gray(warning))
146
+ print(ANSI.gray(str(warning)))
149
147
  else:
150
148
  print(ANSI.gray(message + " Use -vvv to print details."))
151
149
 
huggingface_hub/cli/hf.py CHANGED
@@ -47,10 +47,6 @@ def main():
47
47
  # LFS commands (hidden in --help)
48
48
  LfsCommands.register_subcommand(commands_parser)
49
49
 
50
- # Legacy commands
51
-
52
- # Experimental
53
-
54
50
  # Let's go
55
51
  args = parser.parse_args()
56
52
  if not hasattr(args, "func"):
@@ -59,7 +55,8 @@ def main():
59
55
 
60
56
  # Run
61
57
  service = args.func(args)
62
- service.run()
58
+ if service is not None:
59
+ service.run()
63
60
 
64
61
 
65
62
  if __name__ == "__main__":