huggingface-hub 0.35.0rc0__py3-none-any.whl → 0.35.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +19 -1
- huggingface_hub/_jobs_api.py +168 -12
- huggingface_hub/_local_folder.py +1 -1
- huggingface_hub/_oauth.py +5 -9
- huggingface_hub/_tensorboard_logger.py +9 -10
- huggingface_hub/_upload_large_folder.py +108 -1
- huggingface_hub/cli/auth.py +4 -1
- huggingface_hub/cli/cache.py +7 -9
- huggingface_hub/cli/hf.py +2 -5
- huggingface_hub/cli/jobs.py +591 -13
- huggingface_hub/cli/repo.py +10 -4
- huggingface_hub/commands/delete_cache.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/dataclasses.py +3 -0
- huggingface_hub/file_download.py +12 -10
- huggingface_hub/hf_api.py +549 -95
- huggingface_hub/hf_file_system.py +4 -10
- huggingface_hub/hub_mixin.py +5 -3
- huggingface_hub/inference/_client.py +98 -181
- huggingface_hub/inference/_common.py +72 -70
- huggingface_hub/inference/_generated/_async_client.py +116 -201
- huggingface_hub/inference/_generated/types/chat_completion.py +2 -0
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +1 -1
- huggingface_hub/inference/_mcp/mcp_client.py +28 -11
- huggingface_hub/inference/_mcp/types.py +3 -0
- huggingface_hub/inference/_mcp/utils.py +7 -3
- huggingface_hub/inference/_providers/__init__.py +13 -0
- huggingface_hub/inference/_providers/_common.py +29 -4
- huggingface_hub/inference/_providers/black_forest_labs.py +1 -1
- huggingface_hub/inference/_providers/fal_ai.py +33 -2
- huggingface_hub/inference/_providers/hf_inference.py +15 -7
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/replicate.py +1 -1
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/lfs.py +2 -4
- huggingface_hub/repocard.py +2 -1
- huggingface_hub/utils/_dotenv.py +24 -20
- huggingface_hub/utils/_git_credential.py +1 -1
- huggingface_hub/utils/_http.py +3 -5
- huggingface_hub/utils/_runtime.py +1 -0
- huggingface_hub/utils/_typing.py +24 -4
- huggingface_hub/utils/_xet_progress_reporting.py +31 -10
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/METADATA +7 -4
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/RECORD +50 -48
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/top_level.txt +0 -0
huggingface_hub/__init__.py
CHANGED
|
@@ -46,7 +46,7 @@ import sys
|
|
|
46
46
|
from typing import TYPE_CHECKING
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
__version__ = "0.35.
|
|
49
|
+
__version__ = "0.35.1"
|
|
50
50
|
|
|
51
51
|
# Alphabetical order of definitions is ensured in tests
|
|
52
52
|
# WARNING: any comment added in this dictionary definition will be lost when
|
|
@@ -182,6 +182,8 @@ _SUBMOD_ATTRS = {
|
|
|
182
182
|
"create_inference_endpoint_from_catalog",
|
|
183
183
|
"create_pull_request",
|
|
184
184
|
"create_repo",
|
|
185
|
+
"create_scheduled_job",
|
|
186
|
+
"create_scheduled_uv_job",
|
|
185
187
|
"create_tag",
|
|
186
188
|
"create_webhook",
|
|
187
189
|
"dataset_info",
|
|
@@ -192,6 +194,7 @@ _SUBMOD_ATTRS = {
|
|
|
192
194
|
"delete_folder",
|
|
193
195
|
"delete_inference_endpoint",
|
|
194
196
|
"delete_repo",
|
|
197
|
+
"delete_scheduled_job",
|
|
195
198
|
"delete_space_secret",
|
|
196
199
|
"delete_space_storage",
|
|
197
200
|
"delete_space_variable",
|
|
@@ -219,6 +222,7 @@ _SUBMOD_ATTRS = {
|
|
|
219
222
|
"get_webhook",
|
|
220
223
|
"grant_access",
|
|
221
224
|
"inspect_job",
|
|
225
|
+
"inspect_scheduled_job",
|
|
222
226
|
"list_accepted_access_requests",
|
|
223
227
|
"list_collections",
|
|
224
228
|
"list_datasets",
|
|
@@ -259,6 +263,7 @@ _SUBMOD_ATTRS = {
|
|
|
259
263
|
"request_space_storage",
|
|
260
264
|
"restart_space",
|
|
261
265
|
"resume_inference_endpoint",
|
|
266
|
+
"resume_scheduled_job",
|
|
262
267
|
"revision_exists",
|
|
263
268
|
"run_as_future",
|
|
264
269
|
"run_job",
|
|
@@ -267,6 +272,7 @@ _SUBMOD_ATTRS = {
|
|
|
267
272
|
"set_space_sleep_time",
|
|
268
273
|
"space_info",
|
|
269
274
|
"super_squash_history",
|
|
275
|
+
"suspend_scheduled_job",
|
|
270
276
|
"unlike",
|
|
271
277
|
"update_collection_item",
|
|
272
278
|
"update_collection_metadata",
|
|
@@ -828,6 +834,8 @@ __all__ = [
|
|
|
828
834
|
"create_inference_endpoint_from_catalog",
|
|
829
835
|
"create_pull_request",
|
|
830
836
|
"create_repo",
|
|
837
|
+
"create_scheduled_job",
|
|
838
|
+
"create_scheduled_uv_job",
|
|
831
839
|
"create_tag",
|
|
832
840
|
"create_webhook",
|
|
833
841
|
"dataset_info",
|
|
@@ -838,6 +846,7 @@ __all__ = [
|
|
|
838
846
|
"delete_folder",
|
|
839
847
|
"delete_inference_endpoint",
|
|
840
848
|
"delete_repo",
|
|
849
|
+
"delete_scheduled_job",
|
|
841
850
|
"delete_space_secret",
|
|
842
851
|
"delete_space_storage",
|
|
843
852
|
"delete_space_variable",
|
|
@@ -878,6 +887,7 @@ __all__ = [
|
|
|
878
887
|
"hf_hub_download",
|
|
879
888
|
"hf_hub_url",
|
|
880
889
|
"inspect_job",
|
|
890
|
+
"inspect_scheduled_job",
|
|
881
891
|
"interpreter_login",
|
|
882
892
|
"list_accepted_access_requests",
|
|
883
893
|
"list_collections",
|
|
@@ -933,6 +943,7 @@ __all__ = [
|
|
|
933
943
|
"request_space_storage",
|
|
934
944
|
"restart_space",
|
|
935
945
|
"resume_inference_endpoint",
|
|
946
|
+
"resume_scheduled_job",
|
|
936
947
|
"revision_exists",
|
|
937
948
|
"run_as_future",
|
|
938
949
|
"run_job",
|
|
@@ -949,6 +960,7 @@ __all__ = [
|
|
|
949
960
|
"split_tf_state_dict_into_shards",
|
|
950
961
|
"split_torch_state_dict_into_shards",
|
|
951
962
|
"super_squash_history",
|
|
963
|
+
"suspend_scheduled_job",
|
|
952
964
|
"try_to_load_from_cache",
|
|
953
965
|
"unlike",
|
|
954
966
|
"update_collection_item",
|
|
@@ -1190,6 +1202,8 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1190
1202
|
create_inference_endpoint_from_catalog, # noqa: F401
|
|
1191
1203
|
create_pull_request, # noqa: F401
|
|
1192
1204
|
create_repo, # noqa: F401
|
|
1205
|
+
create_scheduled_job, # noqa: F401
|
|
1206
|
+
create_scheduled_uv_job, # noqa: F401
|
|
1193
1207
|
create_tag, # noqa: F401
|
|
1194
1208
|
create_webhook, # noqa: F401
|
|
1195
1209
|
dataset_info, # noqa: F401
|
|
@@ -1200,6 +1214,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1200
1214
|
delete_folder, # noqa: F401
|
|
1201
1215
|
delete_inference_endpoint, # noqa: F401
|
|
1202
1216
|
delete_repo, # noqa: F401
|
|
1217
|
+
delete_scheduled_job, # noqa: F401
|
|
1203
1218
|
delete_space_secret, # noqa: F401
|
|
1204
1219
|
delete_space_storage, # noqa: F401
|
|
1205
1220
|
delete_space_variable, # noqa: F401
|
|
@@ -1227,6 +1242,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1227
1242
|
get_webhook, # noqa: F401
|
|
1228
1243
|
grant_access, # noqa: F401
|
|
1229
1244
|
inspect_job, # noqa: F401
|
|
1245
|
+
inspect_scheduled_job, # noqa: F401
|
|
1230
1246
|
list_accepted_access_requests, # noqa: F401
|
|
1231
1247
|
list_collections, # noqa: F401
|
|
1232
1248
|
list_datasets, # noqa: F401
|
|
@@ -1267,6 +1283,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1267
1283
|
request_space_storage, # noqa: F401
|
|
1268
1284
|
restart_space, # noqa: F401
|
|
1269
1285
|
resume_inference_endpoint, # noqa: F401
|
|
1286
|
+
resume_scheduled_job, # noqa: F401
|
|
1270
1287
|
revision_exists, # noqa: F401
|
|
1271
1288
|
run_as_future, # noqa: F401
|
|
1272
1289
|
run_job, # noqa: F401
|
|
@@ -1275,6 +1292,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1275
1292
|
set_space_sleep_time, # noqa: F401
|
|
1276
1293
|
space_info, # noqa: F401
|
|
1277
1294
|
super_squash_history, # noqa: F401
|
|
1295
|
+
suspend_scheduled_job, # noqa: F401
|
|
1278
1296
|
unlike, # noqa: F401
|
|
1279
1297
|
update_collection_item, # noqa: F401
|
|
1280
1298
|
update_collection_metadata, # noqa: F401
|
huggingface_hub/_jobs_api.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from dataclasses import dataclass
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from enum import Enum
|
|
18
|
-
from typing import Any, Dict, List, Optional
|
|
18
|
+
from typing import Any, Dict, List, Optional, Union
|
|
19
19
|
|
|
20
20
|
from huggingface_hub import constants
|
|
21
21
|
from huggingface_hub._space_api import SpaceHardware
|
|
@@ -47,15 +47,12 @@ class JobStatus:
|
|
|
47
47
|
stage: JobStage
|
|
48
48
|
message: Optional[str]
|
|
49
49
|
|
|
50
|
-
def __init__(self, **kwargs) -> None:
|
|
51
|
-
self.stage = kwargs["stage"]
|
|
52
|
-
self.message = kwargs.get("message")
|
|
53
|
-
|
|
54
50
|
|
|
55
51
|
@dataclass
|
|
56
52
|
class JobOwner:
|
|
57
53
|
id: str
|
|
58
54
|
name: str
|
|
55
|
+
type: str
|
|
59
56
|
|
|
60
57
|
|
|
61
58
|
@dataclass
|
|
@@ -88,8 +85,8 @@ class JobInfo:
|
|
|
88
85
|
status: (`JobStatus` or `None`):
|
|
89
86
|
Status of the Job, e.g. `JobStatus(stage="RUNNING", message=None)`
|
|
90
87
|
See [`JobStage`] for possible stage values.
|
|
91
|
-
|
|
92
|
-
Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq")`
|
|
88
|
+
owner: (`JobOwner` or `None`):
|
|
89
|
+
Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")`
|
|
93
90
|
|
|
94
91
|
Example:
|
|
95
92
|
|
|
@@ -100,7 +97,7 @@ class JobInfo:
|
|
|
100
97
|
... command=["python", "-c", "print('Hello from the cloud!')"]
|
|
101
98
|
... )
|
|
102
99
|
>>> job
|
|
103
|
-
JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
|
|
100
|
+
JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq', type='user'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
|
|
104
101
|
>>> job.id
|
|
105
102
|
'687fb701029421ae5549d998'
|
|
106
103
|
>>> job.url
|
|
@@ -119,8 +116,8 @@ class JobInfo:
|
|
|
119
116
|
environment: Optional[Dict[str, Any]]
|
|
120
117
|
secrets: Optional[Dict[str, Any]]
|
|
121
118
|
flavor: Optional[SpaceHardware]
|
|
122
|
-
status:
|
|
123
|
-
owner:
|
|
119
|
+
status: JobStatus
|
|
120
|
+
owner: JobOwner
|
|
124
121
|
|
|
125
122
|
# Inferred fields
|
|
126
123
|
endpoint: str
|
|
@@ -132,14 +129,173 @@ class JobInfo:
|
|
|
132
129
|
self.created_at = parse_datetime(created_at) if created_at else None
|
|
133
130
|
self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
|
|
134
131
|
self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
|
|
135
|
-
|
|
132
|
+
owner = kwargs.get("owner", {})
|
|
133
|
+
self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"])
|
|
136
134
|
self.command = kwargs.get("command")
|
|
137
135
|
self.arguments = kwargs.get("arguments")
|
|
138
136
|
self.environment = kwargs.get("environment")
|
|
139
137
|
self.secrets = kwargs.get("secrets")
|
|
140
138
|
self.flavor = kwargs.get("flavor")
|
|
141
|
-
|
|
139
|
+
status = kwargs.get("status", {})
|
|
140
|
+
self.status = JobStatus(stage=status["stage"], message=status.get("message"))
|
|
142
141
|
|
|
143
142
|
# Inferred fields
|
|
144
143
|
self.endpoint = kwargs.get("endpoint", constants.ENDPOINT)
|
|
145
144
|
self.url = f"{self.endpoint}/jobs/{self.owner.name}/{self.id}"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@dataclass
|
|
148
|
+
class JobSpec:
|
|
149
|
+
docker_image: Optional[str]
|
|
150
|
+
space_id: Optional[str]
|
|
151
|
+
command: Optional[List[str]]
|
|
152
|
+
arguments: Optional[List[str]]
|
|
153
|
+
environment: Optional[Dict[str, Any]]
|
|
154
|
+
secrets: Optional[Dict[str, Any]]
|
|
155
|
+
flavor: Optional[SpaceHardware]
|
|
156
|
+
timeout: Optional[int]
|
|
157
|
+
tags: Optional[List[str]]
|
|
158
|
+
arch: Optional[str]
|
|
159
|
+
|
|
160
|
+
def __init__(self, **kwargs) -> None:
|
|
161
|
+
self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
|
|
162
|
+
self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
|
|
163
|
+
self.command = kwargs.get("command")
|
|
164
|
+
self.arguments = kwargs.get("arguments")
|
|
165
|
+
self.environment = kwargs.get("environment")
|
|
166
|
+
self.secrets = kwargs.get("secrets")
|
|
167
|
+
self.flavor = kwargs.get("flavor")
|
|
168
|
+
self.timeout = kwargs.get("timeout")
|
|
169
|
+
self.tags = kwargs.get("tags")
|
|
170
|
+
self.arch = kwargs.get("arch")
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@dataclass
|
|
174
|
+
class LastJobInfo:
|
|
175
|
+
id: str
|
|
176
|
+
at: datetime
|
|
177
|
+
|
|
178
|
+
def __init__(self, **kwargs) -> None:
|
|
179
|
+
self.id = kwargs["id"]
|
|
180
|
+
self.at = parse_datetime(kwargs["at"])
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
@dataclass
|
|
184
|
+
class ScheduledJobStatus:
|
|
185
|
+
last_job: Optional[LastJobInfo]
|
|
186
|
+
next_job_run_at: Optional[datetime]
|
|
187
|
+
|
|
188
|
+
def __init__(self, **kwargs) -> None:
|
|
189
|
+
last_job = kwargs.get("lastJob") or kwargs.get("last_job")
|
|
190
|
+
self.last_job = LastJobInfo(**last_job) if last_job else None
|
|
191
|
+
next_job_run_at = kwargs.get("nextJobRunAt") or kwargs.get("next_job_run_at")
|
|
192
|
+
self.next_job_run_at = parse_datetime(str(next_job_run_at)) if next_job_run_at else None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@dataclass
|
|
196
|
+
class ScheduledJobInfo:
|
|
197
|
+
"""
|
|
198
|
+
Contains information about a Job.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
id (`str`):
|
|
202
|
+
Scheduled Job ID.
|
|
203
|
+
created_at (`datetime` or `None`):
|
|
204
|
+
When the scheduled Job was created.
|
|
205
|
+
tags (`List[str]` or `None`):
|
|
206
|
+
The tags of the scheduled Job.
|
|
207
|
+
schedule (`str` or `None`):
|
|
208
|
+
One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
|
|
209
|
+
CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).
|
|
210
|
+
suspend (`bool` or `None`):
|
|
211
|
+
Whether the scheduled job is suspended (paused).
|
|
212
|
+
concurrency (`bool` or `None`):
|
|
213
|
+
Whether multiple instances of this Job can run concurrently.
|
|
214
|
+
status (`ScheduledJobStatus` or `None`):
|
|
215
|
+
Status of the scheduled Job.
|
|
216
|
+
owner: (`JobOwner` or `None`):
|
|
217
|
+
Owner of the scheduled Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")`
|
|
218
|
+
job_spec: (`JobSpec` or `None`):
|
|
219
|
+
Specifications of the Job.
|
|
220
|
+
|
|
221
|
+
Example:
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
>>> from huggingface_hub import run_job
|
|
225
|
+
>>> scheduled_job = create_scheduled_job(
|
|
226
|
+
... image="python:3.12",
|
|
227
|
+
... command=["python", "-c", "print('Hello from the cloud!')"],
|
|
228
|
+
... schedule="@hourly",
|
|
229
|
+
... )
|
|
230
|
+
>>> scheduled_job.id
|
|
231
|
+
'687fb701029421ae5549d999'
|
|
232
|
+
>>> scheduled_job.status.next_job_run_at
|
|
233
|
+
datetime.datetime(2025, 7, 22, 17, 6, 25, 79000, tzinfo=datetime.timezone.utc)
|
|
234
|
+
```
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
id: str
|
|
238
|
+
created_at: Optional[datetime]
|
|
239
|
+
job_spec: JobSpec
|
|
240
|
+
schedule: Optional[str]
|
|
241
|
+
suspend: Optional[bool]
|
|
242
|
+
concurrency: Optional[bool]
|
|
243
|
+
status: ScheduledJobStatus
|
|
244
|
+
owner: JobOwner
|
|
245
|
+
|
|
246
|
+
def __init__(self, **kwargs) -> None:
|
|
247
|
+
self.id = kwargs["id"]
|
|
248
|
+
created_at = kwargs.get("createdAt") or kwargs.get("created_at")
|
|
249
|
+
self.created_at = parse_datetime(created_at) if created_at else None
|
|
250
|
+
self.job_spec = JobSpec(**(kwargs.get("job_spec") or kwargs.get("jobSpec", {})))
|
|
251
|
+
self.schedule = kwargs.get("schedule")
|
|
252
|
+
self.suspend = kwargs.get("suspend")
|
|
253
|
+
self.concurrency = kwargs.get("concurrency")
|
|
254
|
+
status = kwargs.get("status", {})
|
|
255
|
+
self.status = ScheduledJobStatus(
|
|
256
|
+
last_job=status.get("last_job") or status.get("lastJob"),
|
|
257
|
+
next_job_run_at=status.get("next_job_run_at") or status.get("nextJobRunAt"),
|
|
258
|
+
)
|
|
259
|
+
owner = kwargs.get("owner", {})
|
|
260
|
+
self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"])
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _create_job_spec(
|
|
264
|
+
*,
|
|
265
|
+
image: str,
|
|
266
|
+
command: List[str],
|
|
267
|
+
env: Optional[Dict[str, Any]],
|
|
268
|
+
secrets: Optional[Dict[str, Any]],
|
|
269
|
+
flavor: Optional[SpaceHardware],
|
|
270
|
+
timeout: Optional[Union[int, float, str]],
|
|
271
|
+
) -> Dict[str, Any]:
|
|
272
|
+
# prepare job spec to send to HF Jobs API
|
|
273
|
+
job_spec: Dict[str, Any] = {
|
|
274
|
+
"command": command,
|
|
275
|
+
"arguments": [],
|
|
276
|
+
"environment": env or {},
|
|
277
|
+
"flavor": flavor or SpaceHardware.CPU_BASIC,
|
|
278
|
+
}
|
|
279
|
+
# secrets are optional
|
|
280
|
+
if secrets:
|
|
281
|
+
job_spec["secrets"] = secrets
|
|
282
|
+
# timeout is optional
|
|
283
|
+
if timeout:
|
|
284
|
+
time_units_factors = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24}
|
|
285
|
+
if isinstance(timeout, str) and timeout[-1] in time_units_factors:
|
|
286
|
+
job_spec["timeoutSeconds"] = int(float(timeout[:-1]) * time_units_factors[timeout[-1]])
|
|
287
|
+
else:
|
|
288
|
+
job_spec["timeoutSeconds"] = int(timeout)
|
|
289
|
+
# input is either from docker hub or from HF spaces
|
|
290
|
+
for prefix in (
|
|
291
|
+
"https://huggingface.co/spaces/",
|
|
292
|
+
"https://hf.co/spaces/",
|
|
293
|
+
"huggingface.co/spaces/",
|
|
294
|
+
"hf.co/spaces/",
|
|
295
|
+
):
|
|
296
|
+
if image.startswith(prefix):
|
|
297
|
+
job_spec["spaceId"] = image[len(prefix) :]
|
|
298
|
+
break
|
|
299
|
+
else:
|
|
300
|
+
job_spec["dockerImage"] = image
|
|
301
|
+
return job_spec
|
huggingface_hub/_local_folder.py
CHANGED
|
@@ -90,7 +90,7 @@ class LocalDownloadFilePaths:
|
|
|
90
90
|
resolved_path = str(path.resolve())
|
|
91
91
|
# Some Windows versions do not allow for paths longer than 255 characters.
|
|
92
92
|
# In this case, we must specify it as an extended path by using the "\\?\" prefix.
|
|
93
|
-
if len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"):
|
|
93
|
+
if os.name == "nt" and len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"):
|
|
94
94
|
path = Path("\\\\?\\" + resolved_path)
|
|
95
95
|
return path
|
|
96
96
|
|
huggingface_hub/_oauth.py
CHANGED
|
@@ -6,7 +6,7 @@ import time
|
|
|
6
6
|
import urllib.parse
|
|
7
7
|
import warnings
|
|
8
8
|
from dataclasses import dataclass
|
|
9
|
-
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
|
9
|
+
from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
|
|
10
10
|
|
|
11
11
|
from . import constants
|
|
12
12
|
from .hf_api import whoami
|
|
@@ -39,10 +39,8 @@ class OAuthOrgInfo:
|
|
|
39
39
|
Whether the org has a payment method set up. Hugging Face field.
|
|
40
40
|
role_in_org (`Optional[str]`, *optional*):
|
|
41
41
|
The user's role in the org. Hugging Face field.
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
missing_mfa (`Optional[bool]`, *optional*):
|
|
45
|
-
Indicates if the user granted the OAuth app access to the org but didn't complete MFA. Hugging Face field.
|
|
42
|
+
security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
|
|
43
|
+
Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field.
|
|
46
44
|
"""
|
|
47
45
|
|
|
48
46
|
sub: str
|
|
@@ -52,8 +50,7 @@ class OAuthOrgInfo:
|
|
|
52
50
|
is_enterprise: bool
|
|
53
51
|
can_pay: Optional[bool] = None
|
|
54
52
|
role_in_org: Optional[str] = None
|
|
55
|
-
|
|
56
|
-
missing_mfa: Optional[bool] = None
|
|
53
|
+
security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None
|
|
57
54
|
|
|
58
55
|
|
|
59
56
|
@dataclass
|
|
@@ -221,8 +218,7 @@ def parse_huggingface_oauth(request: "fastapi.Request") -> Optional[OAuthInfo]:
|
|
|
221
218
|
is_enterprise=org.get("isEnterprise"),
|
|
222
219
|
can_pay=org.get("canPay"),
|
|
223
220
|
role_in_org=org.get("roleInOrg"),
|
|
224
|
-
|
|
225
|
-
missing_mfa=org.get("missingMFA"),
|
|
221
|
+
security_restrictions=org.get("securityRestrictions"),
|
|
226
222
|
)
|
|
227
223
|
for org in orgs_data
|
|
228
224
|
]
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"""Contains a logger to push training logs to the Hub, using Tensorboard."""
|
|
15
15
|
|
|
16
16
|
from pathlib import Path
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import List, Optional, Union
|
|
18
18
|
|
|
19
19
|
from ._commit_scheduler import CommitScheduler
|
|
20
20
|
from .errors import EntryNotFoundError
|
|
@@ -26,25 +26,24 @@ from .utils import experimental
|
|
|
26
26
|
# or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
|
|
27
27
|
# from either of them.
|
|
28
28
|
try:
|
|
29
|
-
from tensorboardX import SummaryWriter
|
|
29
|
+
from tensorboardX import SummaryWriter as _RuntimeSummaryWriter
|
|
30
30
|
|
|
31
31
|
is_summary_writer_available = True
|
|
32
|
-
|
|
33
32
|
except ImportError:
|
|
34
33
|
try:
|
|
35
|
-
from torch.utils.tensorboard import SummaryWriter
|
|
34
|
+
from torch.utils.tensorboard import SummaryWriter as _RuntimeSummaryWriter
|
|
36
35
|
|
|
37
|
-
is_summary_writer_available =
|
|
36
|
+
is_summary_writer_available = True
|
|
38
37
|
except ImportError:
|
|
39
38
|
# Dummy class to avoid failing at import. Will raise on instance creation.
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
class _DummySummaryWriter:
|
|
40
|
+
pass
|
|
42
41
|
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
_RuntimeSummaryWriter = _DummySummaryWriter # type: ignore[assignment]
|
|
43
|
+
is_summary_writer_available = False
|
|
45
44
|
|
|
46
45
|
|
|
47
|
-
class HFSummaryWriter(
|
|
46
|
+
class HFSummaryWriter(_RuntimeSummaryWriter):
|
|
48
47
|
"""
|
|
49
48
|
Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
|
|
50
49
|
|
|
@@ -24,7 +24,7 @@ import traceback
|
|
|
24
24
|
from datetime import datetime
|
|
25
25
|
from pathlib import Path
|
|
26
26
|
from threading import Lock
|
|
27
|
-
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
|
|
27
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
28
28
|
from urllib.parse import quote
|
|
29
29
|
|
|
30
30
|
from . import constants
|
|
@@ -49,6 +49,108 @@ COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
|
|
|
49
49
|
UPLOAD_BATCH_SIZE_XET = 256 # Max 256 files per upload batch for XET-enabled repos
|
|
50
50
|
UPLOAD_BATCH_SIZE_LFS = 1 # Otherwise, batches of 1 for regular LFS upload
|
|
51
51
|
|
|
52
|
+
# Repository limits (from https://huggingface.co/docs/hub/repositories-recommendations)
|
|
53
|
+
MAX_FILES_PER_REPO = 100_000 # Recommended maximum number of files per repository
|
|
54
|
+
MAX_FILES_PER_FOLDER = 10_000 # Recommended maximum number of files per folder
|
|
55
|
+
MAX_FILE_SIZE_GB = 50 # Hard limit for individual file size
|
|
56
|
+
RECOMMENDED_FILE_SIZE_GB = 20 # Recommended maximum for individual file size
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _validate_upload_limits(paths_list: List[LocalUploadFilePaths]) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Validate upload against repository limits and warn about potential issues.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
paths_list: List of file paths to be uploaded
|
|
65
|
+
|
|
66
|
+
Warns about:
|
|
67
|
+
- Too many files in the repository (>100k)
|
|
68
|
+
- Too many entries (files or subdirectories) in a single folder (>10k)
|
|
69
|
+
- Files exceeding size limits (>20GB recommended, >50GB hard limit)
|
|
70
|
+
"""
|
|
71
|
+
logger.info("Running validation checks on files to upload...")
|
|
72
|
+
|
|
73
|
+
# Check 1: Total file count
|
|
74
|
+
if len(paths_list) > MAX_FILES_PER_REPO:
|
|
75
|
+
logger.warning(
|
|
76
|
+
f"You are about to upload {len(paths_list):,} files. "
|
|
77
|
+
f"This exceeds the recommended limit of {MAX_FILES_PER_REPO:,} files per repository.\n"
|
|
78
|
+
f"Consider:\n"
|
|
79
|
+
f" - Splitting your data into multiple repositories\n"
|
|
80
|
+
f" - Using fewer, larger files (e.g., parquet files)\n"
|
|
81
|
+
f" - See: https://huggingface.co/docs/hub/repositories-recommendations"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Check 2: Files and subdirectories per folder
|
|
85
|
+
# Track immediate children (files and subdirs) for each folder
|
|
86
|
+
from collections import defaultdict
|
|
87
|
+
|
|
88
|
+
entries_per_folder: Dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
|
|
89
|
+
|
|
90
|
+
for paths in paths_list:
|
|
91
|
+
path = Path(paths.path_in_repo)
|
|
92
|
+
parts = path.parts
|
|
93
|
+
|
|
94
|
+
# Count this file in its immediate parent directory
|
|
95
|
+
parent = str(path.parent) if str(path.parent) != "." else "."
|
|
96
|
+
entries_per_folder[parent]["files"] += 1
|
|
97
|
+
|
|
98
|
+
# Track immediate subdirectories for each parent folder
|
|
99
|
+
# Walk through the path components to track parent-child relationships
|
|
100
|
+
for i, child in enumerate(parts[:-1]):
|
|
101
|
+
parent = "." if i == 0 else "/".join(parts[:i])
|
|
102
|
+
entries_per_folder[parent]["subdirs"].add(child)
|
|
103
|
+
|
|
104
|
+
# Check limits for each folder
|
|
105
|
+
for folder, data in entries_per_folder.items():
|
|
106
|
+
file_count = data["files"]
|
|
107
|
+
subdir_count = len(data["subdirs"])
|
|
108
|
+
total_entries = file_count + subdir_count
|
|
109
|
+
|
|
110
|
+
if total_entries > MAX_FILES_PER_FOLDER:
|
|
111
|
+
folder_display = "root" if folder == "." else folder
|
|
112
|
+
logger.warning(
|
|
113
|
+
f"Folder '{folder_display}' contains {total_entries:,} entries "
|
|
114
|
+
f"({file_count:,} files and {subdir_count:,} subdirectories). "
|
|
115
|
+
f"This exceeds the recommended {MAX_FILES_PER_FOLDER:,} entries per folder.\n"
|
|
116
|
+
"Consider reorganising into sub-folders."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Check 3: File sizes
|
|
120
|
+
large_files = []
|
|
121
|
+
very_large_files = []
|
|
122
|
+
|
|
123
|
+
for paths in paths_list:
|
|
124
|
+
size = paths.file_path.stat().st_size
|
|
125
|
+
size_gb = size / 1_000_000_000 # Use decimal GB as per Hub limits
|
|
126
|
+
|
|
127
|
+
if size_gb > MAX_FILE_SIZE_GB:
|
|
128
|
+
very_large_files.append((paths.path_in_repo, size_gb))
|
|
129
|
+
elif size_gb > RECOMMENDED_FILE_SIZE_GB:
|
|
130
|
+
large_files.append((paths.path_in_repo, size_gb))
|
|
131
|
+
|
|
132
|
+
# Warn about very large files (>50GB)
|
|
133
|
+
if very_large_files:
|
|
134
|
+
files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in very_large_files[:5])
|
|
135
|
+
more_str = f"\n ... and {len(very_large_files) - 5} more files" if len(very_large_files) > 5 else ""
|
|
136
|
+
logger.warning(
|
|
137
|
+
f"Found {len(very_large_files)} files exceeding the {MAX_FILE_SIZE_GB}GB hard limit:\n"
|
|
138
|
+
f" - {files_str}{more_str}\n"
|
|
139
|
+
f"These files may fail to upload. Consider splitting them into smaller chunks."
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Warn about large files (>20GB)
|
|
143
|
+
if large_files:
|
|
144
|
+
files_str = "\n - ".join(f"{path}: {size:.1f}GB" for path, size in large_files[:5])
|
|
145
|
+
more_str = f"\n ... and {len(large_files) - 5} more files" if len(large_files) > 5 else ""
|
|
146
|
+
logger.warning(
|
|
147
|
+
f"Found {len(large_files)} files larger than {RECOMMENDED_FILE_SIZE_GB}GB (recommended limit):\n"
|
|
148
|
+
f" - {files_str}{more_str}\n"
|
|
149
|
+
f"Large files may slow down loading and processing."
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
logger.info("Validation checks complete.")
|
|
153
|
+
|
|
52
154
|
|
|
53
155
|
def upload_large_folder_internal(
|
|
54
156
|
api: "HfApi",
|
|
@@ -118,6 +220,11 @@ def upload_large_folder_internal(
|
|
|
118
220
|
paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
|
|
119
221
|
logger.info(f"Found {len(paths_list)} candidate files to upload")
|
|
120
222
|
|
|
223
|
+
# Validate upload against repository limits
|
|
224
|
+
_validate_upload_limits(paths_list)
|
|
225
|
+
|
|
226
|
+
logger.info("Starting upload...")
|
|
227
|
+
|
|
121
228
|
# Read metadata for each file
|
|
122
229
|
items = [
|
|
123
230
|
(paths, read_upload_metadata(folder_path, paths.path_in_repo))
|
huggingface_hub/cli/auth.py
CHANGED
|
@@ -62,6 +62,9 @@ class AuthCommands(BaseHuggingfaceCLICommand):
|
|
|
62
62
|
auth_parser = parser.add_parser("auth", help="Manage authentication (login, logout, etc.).")
|
|
63
63
|
auth_subparsers = auth_parser.add_subparsers(help="Authentication subcommands")
|
|
64
64
|
|
|
65
|
+
# Show help if no subcommand is provided
|
|
66
|
+
auth_parser.set_defaults(func=lambda args: auth_parser.print_help())
|
|
67
|
+
|
|
65
68
|
# Add 'login' as a subcommand of 'auth'
|
|
66
69
|
login_parser = auth_subparsers.add_parser(
|
|
67
70
|
"login", help="Log in using a token from huggingface.co/settings/tokens"
|
|
@@ -197,7 +200,7 @@ class AuthWhoami(BaseAuthCommand):
|
|
|
197
200
|
exit()
|
|
198
201
|
try:
|
|
199
202
|
info = self._api.whoami(token)
|
|
200
|
-
print(info["name"])
|
|
203
|
+
print(ANSI.bold("user: "), info["name"])
|
|
201
204
|
orgs = [org["name"] for org in info["orgs"]]
|
|
202
205
|
if orgs:
|
|
203
206
|
print(ANSI.bold("orgs: "), ",".join(orgs))
|
huggingface_hub/cli/cache.py
CHANGED
|
@@ -21,13 +21,7 @@ from functools import wraps
|
|
|
21
21
|
from tempfile import mkstemp
|
|
22
22
|
from typing import Any, Callable, Iterable, List, Literal, Optional, Union
|
|
23
23
|
|
|
24
|
-
from ..utils import
|
|
25
|
-
CachedRepoInfo,
|
|
26
|
-
CachedRevisionInfo,
|
|
27
|
-
CacheNotFound,
|
|
28
|
-
HFCacheInfo,
|
|
29
|
-
scan_cache_dir,
|
|
30
|
-
)
|
|
24
|
+
from ..utils import CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir
|
|
31
25
|
from . import BaseHuggingfaceCLICommand
|
|
32
26
|
from ._cli_utils import ANSI, tabulate
|
|
33
27
|
|
|
@@ -52,7 +46,7 @@ def require_inquirer_py(fn: Callable) -> Callable:
|
|
|
52
46
|
if not _inquirer_py_available:
|
|
53
47
|
raise ImportError(
|
|
54
48
|
"The 'cache delete' command requires extra dependencies for the TUI.\n"
|
|
55
|
-
"Please run 'pip install huggingface_hub[cli]' to install them.\n"
|
|
49
|
+
"Please run 'pip install \"huggingface_hub[cli]\"' to install them.\n"
|
|
56
50
|
"Otherwise, disable TUI using the '--disable-tui' flag."
|
|
57
51
|
)
|
|
58
52
|
return fn(*args, **kwargs)
|
|
@@ -65,6 +59,10 @@ class CacheCommand(BaseHuggingfaceCLICommand):
|
|
|
65
59
|
def register_subcommand(parser: _SubParsersAction):
|
|
66
60
|
cache_parser = parser.add_parser("cache", help="Manage local cache directory.")
|
|
67
61
|
cache_subparsers = cache_parser.add_subparsers(dest="cache_command", help="Cache subcommands")
|
|
62
|
+
|
|
63
|
+
# Show help if no subcommand is provided
|
|
64
|
+
cache_parser.set_defaults(func=lambda args: cache_parser.print_help())
|
|
65
|
+
|
|
68
66
|
# Scan subcommand
|
|
69
67
|
scan_parser = cache_subparsers.add_parser("scan", help="Scan cache directory.")
|
|
70
68
|
scan_parser.add_argument(
|
|
@@ -145,7 +143,7 @@ class CacheCommand(BaseHuggingfaceCLICommand):
|
|
|
145
143
|
if self.verbosity >= 3:
|
|
146
144
|
print(ANSI.gray(message))
|
|
147
145
|
for warning in hf_cache_info.warnings:
|
|
148
|
-
print(ANSI.gray(warning))
|
|
146
|
+
print(ANSI.gray(str(warning)))
|
|
149
147
|
else:
|
|
150
148
|
print(ANSI.gray(message + " Use -vvv to print details."))
|
|
151
149
|
|
huggingface_hub/cli/hf.py
CHANGED
|
@@ -47,10 +47,6 @@ def main():
|
|
|
47
47
|
# LFS commands (hidden in --help)
|
|
48
48
|
LfsCommands.register_subcommand(commands_parser)
|
|
49
49
|
|
|
50
|
-
# Legacy commands
|
|
51
|
-
|
|
52
|
-
# Experimental
|
|
53
|
-
|
|
54
50
|
# Let's go
|
|
55
51
|
args = parser.parse_args()
|
|
56
52
|
if not hasattr(args, "func"):
|
|
@@ -59,7 +55,8 @@ def main():
|
|
|
59
55
|
|
|
60
56
|
# Run
|
|
61
57
|
service = args.func(args)
|
|
62
|
-
service
|
|
58
|
+
if service is not None:
|
|
59
|
+
service.run()
|
|
63
60
|
|
|
64
61
|
|
|
65
62
|
if __name__ == "__main__":
|