huggingface-hub 0.13.4__py3-none-any.whl → 0.14.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (40) hide show
  1. huggingface_hub/__init__.py +59 -5
  2. huggingface_hub/_commit_api.py +26 -71
  3. huggingface_hub/_login.py +16 -13
  4. huggingface_hub/_multi_commits.py +305 -0
  5. huggingface_hub/_snapshot_download.py +4 -0
  6. huggingface_hub/_space_api.py +6 -0
  7. huggingface_hub/_webhooks_payload.py +124 -0
  8. huggingface_hub/_webhooks_server.py +362 -0
  9. huggingface_hub/commands/lfs.py +3 -5
  10. huggingface_hub/commands/user.py +0 -3
  11. huggingface_hub/community.py +21 -0
  12. huggingface_hub/constants.py +3 -0
  13. huggingface_hub/file_download.py +25 -10
  14. huggingface_hub/hf_api.py +666 -139
  15. huggingface_hub/hf_file_system.py +441 -0
  16. huggingface_hub/hub_mixin.py +1 -1
  17. huggingface_hub/inference_api.py +2 -4
  18. huggingface_hub/keras_mixin.py +1 -1
  19. huggingface_hub/lfs.py +196 -176
  20. huggingface_hub/repocard.py +2 -2
  21. huggingface_hub/repository.py +1 -1
  22. huggingface_hub/templates/modelcard_template.md +1 -1
  23. huggingface_hub/utils/__init__.py +8 -11
  24. huggingface_hub/utils/_errors.py +4 -4
  25. huggingface_hub/utils/_experimental.py +65 -0
  26. huggingface_hub/utils/_git_credential.py +1 -80
  27. huggingface_hub/utils/_http.py +85 -2
  28. huggingface_hub/utils/_pagination.py +4 -3
  29. huggingface_hub/utils/_paths.py +2 -0
  30. huggingface_hub/utils/_runtime.py +12 -0
  31. huggingface_hub/utils/_subprocess.py +22 -0
  32. huggingface_hub/utils/_telemetry.py +2 -4
  33. huggingface_hub/utils/tqdm.py +23 -18
  34. {huggingface_hub-0.13.4.dist-info → huggingface_hub-0.14.0rc0.dist-info}/METADATA +5 -1
  35. huggingface_hub-0.14.0rc0.dist-info/RECORD +61 -0
  36. {huggingface_hub-0.13.4.dist-info → huggingface_hub-0.14.0rc0.dist-info}/entry_points.txt +3 -0
  37. huggingface_hub-0.13.4.dist-info/RECORD +0 -56
  38. {huggingface_hub-0.13.4.dist-info → huggingface_hub-0.14.0rc0.dist-info}/LICENSE +0 -0
  39. {huggingface_hub-0.13.4.dist-info → huggingface_hub-0.14.0rc0.dist-info}/WHEEL +0 -0
  40. {huggingface_hub-0.13.4.dist-info → huggingface_hub-0.14.0rc0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,305 @@
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains utilities to multi-commits (i.e. push changes iteratively on a PR)."""
16
+ import re
17
+ from dataclasses import dataclass, field
18
+ from hashlib import sha256
19
+ from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple
20
+
21
+ from ._commit_api import CommitOperation, CommitOperationAdd, CommitOperationDelete
22
+ from .community import DiscussionWithDetails
23
+ from .utils import experimental
24
+ from .utils._cache_manager import _format_size
25
+
26
+
27
+ if TYPE_CHECKING:
28
+ from .hf_api import HfApi
29
+
30
+
31
+ class MultiCommitException(Exception):
32
+ """Base exception for any exception happening while doing a multi-commit."""
33
+
34
+
35
+ MULTI_COMMIT_PR_DESCRIPTION_TEMPLATE = """
36
+ ## {commit_message}
37
+
38
+ {commit_description}
39
+
40
+ **Multi commit ID:** {multi_commit_id}
41
+
42
+ Scheduled commits:
43
+
44
+ {multi_commit_strategy}
45
+
46
+ _This is a PR opened using the `huggingface_hub` library in the context of a multi-commit. PR can be commented as a usual PR. However, please be aware that manually updating the PR description, changing the PR status, or pushing new commits, is not recommended as it might corrupt the commit process. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
47
+ """
48
+
49
+ MULTI_COMMIT_PR_COMPLETION_COMMENT_TEMPLATE = """
50
+ Multi-commit is now completed! You can ping the repo owner to review the changes. This PR can now be commented or modified without risking to corrupt it.
51
+
52
+ _This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
53
+ """
54
+
55
+ MULTI_COMMIT_PR_CLOSING_COMMENT_TEMPLATE = """
56
+ `create_pr=False` has been passed so PR is automatically merged.
57
+
58
+ _This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
59
+ """
60
+
61
+ MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_NO_CHANGES_TEMPLATE = """
62
+ Cannot merge Pull Requests as no changes are associated. This PR will be closed automatically.
63
+
64
+ _This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
65
+ """
66
+
67
+ MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_BAD_REQUEST_TEMPLATE = """
68
+ An error occurred while trying to merge the Pull Request: `{error_message}`.
69
+
70
+ _This is a comment posted using the `huggingface_hub` library in the context of a multi-commit. Learn more about multi-commits [in this guide](https://huggingface.co/docs/huggingface_hub/main/guides/upload)._
71
+ """
72
+
73
+
74
+ STEP_ID_REGEX = re.compile(r"- \[(?P<completed>[ |x])\].*(?P<step_id>[a-fA-F0-9]{64})", flags=re.MULTILINE)
75
+
76
+
77
+ @experimental
78
+ def plan_multi_commits(
79
+ operations: Iterable[CommitOperation],
80
+ max_operations_per_commit: int = 50,
81
+ max_upload_size_per_commit: int = 2 * 1024 * 1024 * 1024,
82
+ ) -> Tuple[List[List[CommitOperationAdd]], List[List[CommitOperationDelete]]]:
83
+ """Split a list of operations in a list of commits to perform.
84
+
85
+ Implementation follows a sub-optimal (yet simple) algorithm:
86
+ 1. Delete operations are grouped together by commits of maximum `max_operations_per_commits` operations.
87
+ 2. All additions exceeding `max_upload_size_per_commit` are committed 1 by 1.
88
+ 3. All remaining additions are grouped together and split each time the `max_operations_per_commit` or the
89
+ `max_upload_size_per_commit` limit is reached.
90
+
91
+ We do not try to optimize the splitting to get the lowest number of commits as this is a NP-hard problem (see
92
+ [bin packing problem](https://en.wikipedia.org/wiki/Bin_packing_problem)). For our use case, it is not problematic
93
+ to use a sub-optimal solution so we favored an easy-to-explain implementation.
94
+
95
+ Args:
96
+ operations (`List` of [`~hf_api.CommitOperation`]):
97
+ The list of operations to split into commits.
98
+ max_operations_per_commit (`int`):
99
+ Maximum number of operations in a single commit. Defaults to 50.
100
+ max_upload_size_per_commit (`int`):
101
+ Maximum size to upload (in bytes) in a single commit. Defaults to 2GB. Files bigger than this limit are
102
+ uploaded, 1 per commit.
103
+
104
+ Returns:
105
+ `Tuple[List[List[CommitOperationAdd]], List[List[CommitOperationDelete]]]`: a tuple. First item is a list of
106
+ lists of [`CommitOperationAdd`] representing the addition commits to push. The second item is a list of lists
107
+ of [`CommitOperationDelete`] representing the deletion commits.
108
+
109
+ <Tip warning={true}>
110
+
111
+ `plan_multi_commits` is experimental. Its API and behavior is subject to change in the future without prior notice.
112
+
113
+ </Tip>
114
+
115
+ Example:
116
+ ```python
117
+ >>> from huggingface_hub import HfApi, plan_multi_commits
118
+ >>> addition_commits, deletion_commits = plan_multi_commits(
119
+ ... operations=[
120
+ ... CommitOperationAdd(...),
121
+ ... CommitOperationAdd(...),
122
+ ... CommitOperationDelete(...),
123
+ ... CommitOperationDelete(...),
124
+ ... CommitOperationAdd(...),
125
+ ... ],
126
+ ... )
127
+ >>> HfApi().create_commits_on_pr(
128
+ ... repo_id="my-cool-model",
129
+ ... addition_commits=addition_commits,
130
+ ... deletion_commits=deletion_commits,
131
+ ... (...)
132
+ ... verbose=True,
133
+ ... )
134
+ ```
135
+
136
+ <Tip warning={true}>
137
+
138
+ The initial order of the operations is not guaranteed! All deletions will be performed before additions. If you are
139
+ not updating multiple times the same file, you are fine.
140
+
141
+ </Tip>
142
+ """
143
+ addition_commits: List[List[CommitOperationAdd]] = []
144
+ deletion_commits: List[List[CommitOperationDelete]] = []
145
+
146
+ additions: List[CommitOperationAdd] = []
147
+ additions_size = 0
148
+ deletions: List[CommitOperationDelete] = []
149
+ for op in operations:
150
+ if isinstance(op, CommitOperationDelete):
151
+ # Group delete operations together
152
+ deletions.append(op)
153
+ if len(deletions) >= max_operations_per_commit:
154
+ deletion_commits.append(deletions)
155
+ deletions = []
156
+
157
+ elif op.upload_info.size >= max_upload_size_per_commit:
158
+ # Upload huge files 1 by 1
159
+ addition_commits.append([op])
160
+
161
+ elif additions_size + op.upload_info.size < max_upload_size_per_commit:
162
+ # Group other additions and split if size limit is reached (either max_nb_files or max_upload_size)
163
+ additions.append(op)
164
+ additions_size += op.upload_info.size
165
+
166
+ else:
167
+ addition_commits.append(additions)
168
+ additions = [op]
169
+ additions_size = op.upload_info.size
170
+
171
+ if len(additions) >= max_operations_per_commit:
172
+ addition_commits.append(additions)
173
+ additions = []
174
+ additions_size = 0
175
+
176
+ if len(additions) > 0:
177
+ addition_commits.append(additions)
178
+ if len(deletions) > 0:
179
+ deletion_commits.append(deletions)
180
+
181
+ return addition_commits, deletion_commits
182
+
183
+
184
+ @dataclass
185
+ class MultiCommitStep:
186
+ """Dataclass containing a list of CommitOperation to commit at once.
187
+
188
+ A [`MultiCommitStep`] is one atomic part of a [`MultiCommitStrategy`]. Each step is identified by its own
189
+ deterministic ID based on the list of commit operations (hexadecimal sha256). ID is persistent between re-runs if
190
+ the list of commits is kept the same.
191
+ """
192
+
193
+ operations: List[CommitOperation]
194
+
195
+ id: str = field(init=False)
196
+ completed: bool = False
197
+
198
+ def __post_init__(self) -> None:
199
+ if len(self.operations) == 0:
200
+ raise ValueError("A MultiCommitStep must have at least 1 commit operation, got 0.")
201
+
202
+ # Generate commit id
203
+ sha = sha256()
204
+ for op in self.operations:
205
+ if isinstance(op, CommitOperationAdd):
206
+ sha.update(b"ADD")
207
+ sha.update(op.path_in_repo.encode())
208
+ sha.update(op.upload_info.sha256)
209
+ elif isinstance(op, CommitOperationDelete):
210
+ sha.update(b"DELETE")
211
+ sha.update(op.path_in_repo.encode())
212
+ sha.update(str(op.is_folder).encode())
213
+ else:
214
+ NotImplementedError()
215
+ self.id = sha.hexdigest()
216
+
217
+ def __str__(self) -> str:
218
+ """Format a step for PR description.
219
+
220
+ Formatting can be changed in the future as long as it is single line, starts with `- [ ]`/`- [x]` and contains
221
+ `self.id`. Must be able to match `STEP_ID_REGEX`.
222
+ """
223
+ additions = [op for op in self.operations if isinstance(op, CommitOperationAdd)]
224
+ file_deletions = [op for op in self.operations if isinstance(op, CommitOperationDelete) and not op.is_folder]
225
+ folder_deletions = [op for op in self.operations if isinstance(op, CommitOperationDelete) and op.is_folder]
226
+ if len(additions) > 0:
227
+ return (
228
+ f"- [{'x' if self.completed else ' '}] Upload {len(additions)} file(s) "
229
+ f"totalling {_format_size(sum(add.upload_info.size for add in additions))}"
230
+ f" ({self.id})"
231
+ )
232
+ else:
233
+ return (
234
+ f"- [{'x' if self.completed else ' '}] Delete {len(file_deletions)} file(s) and"
235
+ f" {len(folder_deletions)} folder(s) ({self.id})"
236
+ )
237
+
238
+
239
+ @dataclass
240
+ class MultiCommitStrategy:
241
+ """Dataclass containing a list of [`MultiCommitStep`] to commit iteratively.
242
+
243
+ A strategy is identified by its own deterministic ID based on the list of its steps (hexadecimal sha256). ID is
244
+ persistent between re-runs if the list of commits is kept the same.
245
+ """
246
+
247
+ addition_commits: List[MultiCommitStep]
248
+ deletion_commits: List[MultiCommitStep]
249
+
250
+ id: str = field(init=False)
251
+ all_steps: Set[str] = field(init=False)
252
+
253
+ def __post_init__(self) -> None:
254
+ self.all_steps = {step.id for step in self.addition_commits + self.deletion_commits}
255
+ if len(self.all_steps) < len(self.addition_commits) + len(self.deletion_commits):
256
+ raise ValueError("Got duplicate commits in MultiCommitStrategy. All commits must be unique.")
257
+
258
+ if len(self.all_steps) == 0:
259
+ raise ValueError("A MultiCommitStrategy must have at least 1 commit, got 0.")
260
+
261
+ # Generate strategy id
262
+ sha = sha256()
263
+ for step in self.addition_commits + self.deletion_commits:
264
+ sha.update("new step".encode())
265
+ sha.update(step.id.encode())
266
+ self.id = sha.hexdigest()
267
+
268
+
269
+ def multi_commit_create_pull_request(
270
+ api: "HfApi",
271
+ repo_id: str,
272
+ commit_message: str,
273
+ commit_description: Optional[str],
274
+ strategy: MultiCommitStrategy,
275
+ token: Optional[str],
276
+ repo_type: Optional[str],
277
+ ) -> DiscussionWithDetails:
278
+ return api.create_pull_request(
279
+ repo_id=repo_id,
280
+ title=f"[WIP] {commit_message} (multi-commit {strategy.id})",
281
+ description=multi_commit_generate_comment(
282
+ commit_message=commit_message, commit_description=commit_description, strategy=strategy
283
+ ),
284
+ token=token,
285
+ repo_type=repo_type,
286
+ )
287
+
288
+
289
+ def multi_commit_generate_comment(
290
+ commit_message: str,
291
+ commit_description: Optional[str],
292
+ strategy: MultiCommitStrategy,
293
+ ) -> str:
294
+ return MULTI_COMMIT_PR_DESCRIPTION_TEMPLATE.format(
295
+ commit_message=commit_message,
296
+ commit_description=commit_description or "",
297
+ multi_commit_id=strategy.id,
298
+ multi_commit_strategy="\n".join(
299
+ str(commit) for commit in strategy.deletion_commits + strategy.addition_commits
300
+ ),
301
+ )
302
+
303
+
304
+ def multi_commit_parse_pr_description(description: str) -> Set[str]:
305
+ return {match[1] for match in STEP_ID_REGEX.findall(description)}
@@ -36,6 +36,7 @@ def snapshot_download(
36
36
  proxies: Optional[Dict] = None,
37
37
  etag_timeout: float = 10,
38
38
  resume_download: bool = False,
39
+ force_download: bool = False,
39
40
  token: Optional[Union[bool, str]] = None,
40
41
  local_files_only: bool = False,
41
42
  allow_patterns: Optional[Union[List[str], str]] = None,
@@ -101,6 +102,8 @@ def snapshot_download(
101
102
  data before giving up which is passed to `requests.request`.
102
103
  resume_download (`bool`, *optional*, defaults to `False):
103
104
  If `True`, resume a previously interrupted download.
105
+ force_download (`bool`, *optional*, defaults to `False`):
106
+ Whether the file should be downloaded even if it already exists in the local cache.
104
107
  token (`str`, `bool`, *optional*):
105
108
  A token to be used for the download.
106
109
  - If `True`, the token is read from the HuggingFace config
@@ -223,6 +226,7 @@ def snapshot_download(
223
226
  proxies=proxies,
224
227
  etag_timeout=etag_timeout,
225
228
  resume_download=resume_download,
229
+ force_download=force_download,
226
230
  token=token,
227
231
  )
228
232
 
@@ -78,6 +78,10 @@ class SpaceRuntime:
78
78
  Requested hardware. Can be different than `hardware` especially if the request
79
79
  has just been made. Example: "t4-medium". Can be `None` if no hardware has
80
80
  been requested yet.
81
+ sleep_time (`int` or `None`):
82
+ Number of seconds the Space will be kept alive after the last request. By default (if value is `None`), the
83
+ Space will never go to sleep if it's running on an upgraded hardware, while it will go to sleep after 48
84
+ hours on a free 'cpu-basic' hardware. For more details, see https://huggingface.co/docs/hub/spaces-gpus#sleep-time.
81
85
  raw (`dict`):
82
86
  Raw response from the server. Contains more information about the Space
83
87
  runtime like number of replicas, number of cpu, memory size,...
@@ -86,10 +90,12 @@ class SpaceRuntime:
86
90
  stage: SpaceStage
87
91
  hardware: Optional[SpaceHardware]
88
92
  requested_hardware: Optional[SpaceHardware]
93
+ sleep_time: Optional[int]
89
94
  raw: Dict
90
95
 
91
96
  def __init__(self, data: Dict) -> None:
92
97
  self.stage = data["stage"]
93
98
  self.hardware = data["hardware"]["current"]
94
99
  self.requested_hardware = data["hardware"]["requested"]
100
+ self.sleep_time = data["gcTimeout"]
95
101
  self.raw = data
@@ -0,0 +1,124 @@
1
+ # coding=utf-8
2
+ # Copyright 2023-present, the HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Contains data structures to parse the webhooks payload."""
16
+ from typing import List, Optional
17
+
18
+ from .utils import is_gradio_available
19
+ from .utils._typing import Literal
20
+
21
+
22
+ if not is_gradio_available():
23
+ raise ImportError(
24
+ "You must have `gradio` installed to use `WebhooksServer`. Please run `pip install --upgrade gradio` first."
25
+ )
26
+
27
+ from pydantic import BaseModel
28
+
29
+
30
+ # This is an adaptation of the ReportV3 interface implemented in moon-landing. V0, V1 and V2 have been ignored as they
31
+ # are not in used anymore. To keep in sync when format is updated in
32
+ # https://github.com/huggingface/moon-landing/blob/main/server/lib/HFWebhooks.ts (internal link).
33
+
34
+
35
+ WebhookEvent_T = Literal[
36
+ "create",
37
+ "delete",
38
+ "move",
39
+ "update",
40
+ ]
41
+ RepoChangeEvent_T = Literal[
42
+ "add",
43
+ "move",
44
+ "remove",
45
+ "update",
46
+ ]
47
+ RepoType_T = Literal[
48
+ "dataset",
49
+ "model",
50
+ "space",
51
+ ]
52
+ DiscussionStatus_T = Literal[
53
+ "closed",
54
+ "draft",
55
+ "open",
56
+ "merged",
57
+ ]
58
+ SupportedWebhookVersion = Literal[3]
59
+
60
+
61
+ class ObjectId(BaseModel):
62
+ id: str
63
+
64
+
65
+ class WebhookPayloadUrl(BaseModel):
66
+ web: str
67
+ api: Optional[str]
68
+
69
+
70
+ class WebhookPayloadMovedTo(BaseModel):
71
+ name: str
72
+ owner: ObjectId
73
+
74
+
75
+ class WebhookPayloadWebhook(ObjectId):
76
+ version: SupportedWebhookVersion
77
+
78
+
79
+ class WebhookPayloadEvent(BaseModel):
80
+ action: WebhookEvent_T
81
+ scope: str
82
+
83
+
84
+ class WebhookPayloadDiscussionChanges(BaseModel):
85
+ base: str
86
+ mergeCommitId: Optional[str]
87
+
88
+
89
+ class WebhookPayloadComment(ObjectId):
90
+ author: ObjectId
91
+ hidden: bool
92
+ content: Optional[str]
93
+ url: WebhookPayloadUrl
94
+
95
+
96
+ class WebhookPayloadDiscussion(ObjectId):
97
+ num: int
98
+ author: ObjectId
99
+ url: WebhookPayloadUrl
100
+ title: str
101
+ isPullRequest: bool
102
+ status: DiscussionStatus_T
103
+ changes: Optional[WebhookPayloadDiscussionChanges]
104
+ pinned: Optional[bool]
105
+
106
+
107
+ class WebhookPayloadRepo(ObjectId):
108
+ owner: ObjectId
109
+ head_sha: Optional[str]
110
+ name: str
111
+ private: bool
112
+ subdomain: Optional[str]
113
+ tags: Optional[List[str]]
114
+ type: Literal["dataset", "model", "space"]
115
+ url: WebhookPayloadUrl
116
+
117
+
118
+ class WebhookPayload(BaseModel):
119
+ event: WebhookPayloadEvent
120
+ repo: WebhookPayloadRepo
121
+ discussion: Optional[WebhookPayloadDiscussion]
122
+ comment: Optional[WebhookPayloadComment]
123
+ webhook: WebhookPayloadWebhook
124
+ movedTo: Optional[WebhookPayloadMovedTo]