huggingface-hub 0.12.1__py3-none-any.whl → 0.13.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. huggingface_hub/__init__.py +165 -127
  2. huggingface_hub/_commit_api.py +25 -51
  3. huggingface_hub/_login.py +4 -13
  4. huggingface_hub/_snapshot_download.py +45 -23
  5. huggingface_hub/_space_api.py +7 -0
  6. huggingface_hub/commands/delete_cache.py +13 -39
  7. huggingface_hub/commands/env.py +1 -3
  8. huggingface_hub/commands/huggingface_cli.py +1 -3
  9. huggingface_hub/commands/lfs.py +4 -8
  10. huggingface_hub/commands/scan_cache.py +5 -16
  11. huggingface_hub/commands/user.py +27 -45
  12. huggingface_hub/community.py +4 -4
  13. huggingface_hub/constants.py +22 -19
  14. huggingface_hub/fastai_utils.py +14 -23
  15. huggingface_hub/file_download.py +166 -108
  16. huggingface_hub/hf_api.py +500 -255
  17. huggingface_hub/hub_mixin.py +181 -176
  18. huggingface_hub/inference_api.py +4 -10
  19. huggingface_hub/keras_mixin.py +39 -71
  20. huggingface_hub/lfs.py +8 -24
  21. huggingface_hub/repocard.py +33 -48
  22. huggingface_hub/repocard_data.py +141 -30
  23. huggingface_hub/repository.py +41 -112
  24. huggingface_hub/templates/modelcard_template.md +39 -34
  25. huggingface_hub/utils/__init__.py +1 -0
  26. huggingface_hub/utils/_cache_assets.py +1 -4
  27. huggingface_hub/utils/_cache_manager.py +17 -39
  28. huggingface_hub/utils/_deprecation.py +8 -12
  29. huggingface_hub/utils/_errors.py +10 -57
  30. huggingface_hub/utils/_fixes.py +2 -6
  31. huggingface_hub/utils/_git_credential.py +5 -16
  32. huggingface_hub/utils/_headers.py +22 -11
  33. huggingface_hub/utils/_http.py +1 -4
  34. huggingface_hub/utils/_paths.py +5 -12
  35. huggingface_hub/utils/_runtime.py +2 -1
  36. huggingface_hub/utils/_telemetry.py +120 -0
  37. huggingface_hub/utils/_validators.py +5 -13
  38. huggingface_hub/utils/endpoint_helpers.py +1 -3
  39. huggingface_hub/utils/logging.py +10 -8
  40. {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/METADATA +7 -14
  41. huggingface_hub-0.13.0rc0.dist-info/RECORD +56 -0
  42. huggingface_hub/py.typed +0 -0
  43. huggingface_hub-0.12.1.dist-info/RECORD +0 -56
  44. {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/LICENSE +0 -0
  45. {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/WHEEL +0 -0
  46. {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/entry_points.txt +0 -0
  47. {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/hf_api.py CHANGED
@@ -13,19 +13,22 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
  import json
16
- import os
16
+ import pprint
17
17
  import re
18
+ import textwrap
18
19
  import warnings
19
20
  from dataclasses import dataclass, field
21
+ from datetime import datetime
20
22
  from itertools import islice
21
23
  from pathlib import Path
22
24
  from typing import Any, BinaryIO, Dict, Iterable, Iterator, List, Optional, Tuple, Union
23
25
  from urllib.parse import quote
24
26
 
25
27
  import requests
26
- from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
27
28
  from requests.exceptions import HTTPError
28
29
 
30
+ from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
31
+
29
32
  from ._commit_api import (
30
33
  CommitOperation,
31
34
  CommitOperationAdd,
@@ -90,9 +93,18 @@ _REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$")
90
93
  logger = logging.get_logger(__name__)
91
94
 
92
95
 
93
- def repo_type_and_id_from_hf_id(
94
- hf_id: str, hub_url: Optional[str] = None
95
- ) -> Tuple[Optional[str], Optional[str], str]:
96
+ class ReprMixin:
97
+ """Mixin to create the __repr__ for a class"""
98
+
99
+ def __repr__(self):
100
+ formatted_value = pprint.pformat(self.__dict__, width=119, compact=True, sort_dicts=False)
101
+ if "\n" in formatted_value:
102
+ return f"{self.__class__.__name__}: {{ \n{textwrap.indent(formatted_value, ' ')}\n}}"
103
+ else:
104
+ return f"{self.__class__.__name__}: {formatted_value}"
105
+
106
+
107
+ def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> Tuple[Optional[str], Optional[str], str]:
96
108
  """
97
109
  Returns the repo type and ID from a huggingface.co URL linking to a
98
110
  repository
@@ -139,6 +151,10 @@ def repo_type_and_id_from_hf_id(
139
151
  namespace = None
140
152
  if len(url_segments) > 2 and hub_url not in url_segments[-3]:
141
153
  repo_type = url_segments[-3]
154
+ elif namespace in REPO_TYPES_MAPPING:
155
+ # Mean canonical dataset or model
156
+ repo_type = REPO_TYPES_MAPPING[namespace]
157
+ namespace = None
142
158
  else:
143
159
  repo_type = None
144
160
  elif is_hf_id:
@@ -146,17 +162,21 @@ def repo_type_and_id_from_hf_id(
146
162
  # Passed <repo_type>/<user>/<model_id> or <repo_type>/<org>/<model_id>
147
163
  repo_type, namespace, repo_id = url_segments[-3:]
148
164
  elif len(url_segments) == 2:
149
- # Passed <user>/<model_id> or <org>/<model_id>
150
- namespace, repo_id = hf_id.split("/")[-2:]
151
- repo_type = None
165
+ if url_segments[0] in REPO_TYPES_MAPPING:
166
+ # Passed '<model_id>' or 'datasets/<dataset_id>' for a canonical model or dataset
167
+ repo_type = REPO_TYPES_MAPPING[url_segments[0]]
168
+ namespace = None
169
+ repo_id = hf_id.split("/")[-1]
170
+ else:
171
+ # Passed <user>/<model_id> or <org>/<model_id>
172
+ namespace, repo_id = hf_id.split("/")[-2:]
173
+ repo_type = None
152
174
  else:
153
175
  # Passed <model_id>
154
176
  repo_id = url_segments[0]
155
177
  namespace, repo_type = None, None
156
178
  else:
157
- raise ValueError(
158
- f"Unable to retrieve user and repo ID from the passed HF ID: {hf_id}"
159
- )
179
+ raise ValueError(f"Unable to retrieve user and repo ID from the passed HF ID: {hf_id}")
160
180
 
161
181
  # Check if repo type is known (mapping "spaces" => "space" + empty value => `None`)
162
182
  if repo_type in REPO_TYPES_MAPPING:
@@ -237,6 +257,7 @@ class RepoUrl(str):
237
257
  compatibility. At initialization, the URL is parsed to populate properties:
238
258
  - endpoint (`str`)
239
259
  - namespace (`Optional[str]`)
260
+ - repo_name (`str`)
240
261
  - repo_id (`str`)
241
262
  - repo_type (`Literal["model", "dataset", "space"]`)
242
263
  - url (`str`)
@@ -276,24 +297,20 @@ class RepoUrl(str):
276
297
  super().__init__()
277
298
  # Parse URL
278
299
  self.endpoint = endpoint or ENDPOINT
279
- repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(
280
- self, hub_url=self.endpoint
281
- )
300
+ repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(self, hub_url=self.endpoint)
282
301
 
283
302
  # Populate fields
284
303
  self.namespace = namespace
304
+ self.repo_name = repo_name
285
305
  self.repo_id = repo_name if namespace is None else f"{namespace}/{repo_name}"
286
306
  self.repo_type = repo_type or REPO_TYPE_MODEL
287
307
  self.url = str(self) # just in case it's needed
288
308
 
289
309
  def __repr__(self) -> str:
290
- return (
291
- f"RepoUrl('{self}', endpoint='{self.endpoint}',"
292
- f" repo_type='{self.repo_type}', repo_id='{self.repo_id}')"
293
- )
310
+ return f"RepoUrl('{self}', endpoint='{self.endpoint}', repo_type='{self.repo_type}', repo_id='{self.repo_id}')"
294
311
 
295
312
 
296
- class RepoFile:
313
+ class RepoFile(ReprMixin):
297
314
  """
298
315
  Data structure that represents a public file inside a repo, accessible from
299
316
  huggingface.co
@@ -334,12 +351,8 @@ class RepoFile:
334
351
  for k, v in kwargs.items():
335
352
  setattr(self, k, v)
336
353
 
337
- def __repr__(self):
338
- items = (f"{k}='{v}'" for k, v in self.__dict__.items())
339
- return f"{self.__class__.__name__}({', '.join(items)})"
340
-
341
354
 
342
- class ModelInfo:
355
+ class ModelInfo(ReprMixin):
343
356
  """
344
357
  Info about a model accessible from huggingface.co
345
358
 
@@ -389,9 +402,7 @@ class ModelInfo:
389
402
  self.lastModified = lastModified
390
403
  self.tags = tags
391
404
  self.pipeline_tag = pipeline_tag
392
- self.siblings = (
393
- [RepoFile(**x) for x in siblings] if siblings is not None else []
394
- )
405
+ self.siblings = [RepoFile(**x) for x in siblings] if siblings is not None else []
395
406
  self.private = private
396
407
  self.author = author
397
408
  self.config = config
@@ -399,12 +410,6 @@ class ModelInfo:
399
410
  for k, v in kwargs.items():
400
411
  setattr(self, k, v)
401
412
 
402
- def __repr__(self):
403
- s = f"{self.__class__.__name__}:" + " {"
404
- for key, val in self.__dict__.items():
405
- s += f"\n\t{key}: {val}"
406
- return s + "\n}"
407
-
408
413
  def __str__(self):
409
414
  r = f"Model Name: {self.modelId}, Tags: {self.tags}"
410
415
  if self.pipeline_tag:
@@ -412,7 +417,7 @@ class ModelInfo:
412
417
  return r
413
418
 
414
419
 
415
- class DatasetInfo:
420
+ class DatasetInfo(ReprMixin):
416
421
  """
417
422
  Info about a dataset accessible from huggingface.co
418
423
 
@@ -423,7 +428,7 @@ class DatasetInfo:
423
428
  repo sha at this particular revision
424
429
  lastModified (`str`, *optional*):
425
430
  date of last commit to repo
426
- tags (`Listr[str]`, *optional*):
431
+ tags (`List[str]`, *optional*):
427
432
  List of tags.
428
433
  siblings (`List[RepoFile]`, *optional*):
429
434
  list of [`huggingface_hub.hf_api.RepoFile`] objects that constitute the dataset.
@@ -465,9 +470,7 @@ class DatasetInfo:
465
470
  self.description = description
466
471
  self.citation = citation
467
472
  self.cardData = cardData
468
- self.siblings = (
469
- [RepoFile(**x) for x in siblings] if siblings is not None else []
470
- )
473
+ self.siblings = [RepoFile(**x) for x in siblings] if siblings is not None else []
471
474
  # Legacy stuff, "key" is always returned with an empty string
472
475
  # because of old versions of the datasets lib that need this field
473
476
  kwargs.pop("key", None)
@@ -475,18 +478,12 @@ class DatasetInfo:
475
478
  for k, v in kwargs.items():
476
479
  setattr(self, k, v)
477
480
 
478
- def __repr__(self):
479
- s = f"{self.__class__.__name__}:" + " {"
480
- for key, val in self.__dict__.items():
481
- s += f"\n\t{key}: {val}"
482
- return s + "\n}"
483
-
484
481
  def __str__(self):
485
482
  r = f"Dataset Name: {self.id}, Tags: {self.tags}"
486
483
  return r
487
484
 
488
485
 
489
- class SpaceInfo:
486
+ class SpaceInfo(ReprMixin):
490
487
  """
491
488
  Info about a Space accessible from huggingface.co
492
489
 
@@ -524,22 +521,14 @@ class SpaceInfo:
524
521
  self.id = id
525
522
  self.sha = sha
526
523
  self.lastModified = lastModified
527
- self.siblings = (
528
- [RepoFile(**x) for x in siblings] if siblings is not None else []
529
- )
524
+ self.siblings = [RepoFile(**x) for x in siblings] if siblings is not None else []
530
525
  self.private = private
531
526
  self.author = author
532
527
  for k, v in kwargs.items():
533
528
  setattr(self, k, v)
534
529
 
535
- def __repr__(self):
536
- s = f"{self.__class__.__name__}:" + " {"
537
- for key, val in self.__dict__.items():
538
- s += f"\n\t{key}: {val}"
539
- return s + "\n}"
540
-
541
530
 
542
- class MetricInfo:
531
+ class MetricInfo(ReprMixin):
543
532
  """
544
533
  Info about a public metric accessible from huggingface.co
545
534
  """
@@ -562,12 +551,6 @@ class MetricInfo:
562
551
  for k, v in kwargs.items():
563
552
  setattr(self, k, v)
564
553
 
565
- def __repr__(self):
566
- s = f"{self.__class__.__name__}:" + " {"
567
- for key, val in self.__dict__.items():
568
- s += f"\n\t{key}: {val}"
569
- return s + "\n}"
570
-
571
554
  def __str__(self):
572
555
  r = f"Metric Name: {self.id}"
573
556
  return r
@@ -719,6 +702,49 @@ class GitRefs:
719
702
  tags: List[GitRefInfo]
720
703
 
721
704
 
705
+ @dataclass
706
+ class GitCommitInfo:
707
+ """
708
+ Contains information about a git commit for a repo on the Hub. Check out [`list_repo_commits`] for more details.
709
+
710
+ Args:
711
+ commit_id (`str`):
712
+ OID of the commit (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`)
713
+ authors (`List[str]`):
714
+ List of authors of the commit.
715
+ created_at (`datetime`):
716
+ Datetime when the commit was created.
717
+ title (`str`):
718
+ Title of the commit. This is a free-text value entered by the authors.
719
+ message (`str`):
720
+ Description of the commit. This is a free-text value entered by the authors.
721
+ formatted_title (`str`):
722
+ Title of the commit formatted as HTML. Only returned if `formatted=True` is set.
723
+ formatted_message (`str`):
724
+ Description of the commit formatted as HTML. Only returned if `formatted=True` is set.
725
+ """
726
+
727
+ commit_id: str
728
+
729
+ authors: List[str]
730
+ created_at: datetime
731
+ title: str
732
+ message: str
733
+
734
+ formatted_title: Optional[str]
735
+ formatted_message: Optional[str]
736
+
737
+ def __init__(self, data: Dict) -> None:
738
+ self.commit_id = data["id"]
739
+ self.authors = [author["user"] for author in data["authors"]]
740
+ self.created_at = parse_datetime(data["date"])
741
+ self.title = data["title"]
742
+ self.message = data["message"]
743
+
744
+ self.formatted_title = data.get("formatted", {}).get("title")
745
+ self.formatted_message = data.get("formatted", {}).get("message")
746
+
747
+
722
748
  @dataclass
723
749
  class UserLikes:
724
750
  """
@@ -1030,10 +1056,7 @@ class HfApi:
1030
1056
 
1031
1057
  if emissions_thresholds is not None:
1032
1058
  if cardData is None:
1033
- raise ValueError(
1034
- "`emissions_thresholds` were passed without setting"
1035
- " `cardData=True`."
1036
- )
1059
+ raise ValueError("`emissions_thresholds` were passed without setting `cardData=True`.")
1037
1060
  else:
1038
1061
  return _filter_emissions(items, *emissions_thresholds)
1039
1062
 
@@ -1058,11 +1081,7 @@ class HfApi:
1058
1081
 
1059
1082
  # Handling tasks
1060
1083
  if model_filter.task is not None:
1061
- filter_list.extend(
1062
- [model_filter.task]
1063
- if isinstance(model_filter.task, str)
1064
- else model_filter.task
1065
- )
1084
+ filter_list.extend([model_filter.task] if isinstance(model_filter.task, str) else model_filter.task)
1066
1085
 
1067
1086
  # Handling dataset
1068
1087
  if model_filter.trained_dataset is not None:
@@ -1076,18 +1095,12 @@ class HfApi:
1076
1095
  # Handling library
1077
1096
  if model_filter.library:
1078
1097
  filter_list.extend(
1079
- [model_filter.library]
1080
- if isinstance(model_filter.library, str)
1081
- else model_filter.library
1098
+ [model_filter.library] if isinstance(model_filter.library, str) else model_filter.library
1082
1099
  )
1083
1100
 
1084
1101
  # Handling tags
1085
1102
  if model_filter.tags:
1086
- tags.extend(
1087
- [model_filter.tags]
1088
- if isinstance(model_filter.tags, str)
1089
- else model_filter.tags
1090
- )
1103
+ tags.extend([model_filter.tags] if isinstance(model_filter.tags, str) else model_filter.tags)
1091
1104
 
1092
1105
  query_dict: Dict[str, Any] = {}
1093
1106
  if model_str is not None:
@@ -1520,8 +1533,7 @@ class HfApi:
1520
1533
  user = me["name"]
1521
1534
  else:
1522
1535
  raise ValueError(
1523
- "Cannot list liked repos. You must provide a 'user' as input or be"
1524
- " logged in as a user."
1536
+ "Cannot list liked repos. You must provide a 'user' as input or be logged in as a user."
1525
1537
  )
1526
1538
 
1527
1539
  path = f"{self.endpoint}/api/users/{user}/likes"
@@ -1540,21 +1552,9 @@ class HfApi:
1540
1552
  return UserLikes(
1541
1553
  user=user,
1542
1554
  total=len(likes),
1543
- models=[
1544
- like["repo"]["name"]
1545
- for like in likes
1546
- if like["repo"]["type"] == "model"
1547
- ],
1548
- datasets=[
1549
- like["repo"]["name"]
1550
- for like in likes
1551
- if like["repo"]["type"] == "dataset"
1552
- ],
1553
- spaces=[
1554
- like["repo"]["name"]
1555
- for like in likes
1556
- if like["repo"]["type"] == "space"
1557
- ],
1555
+ models=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "model"],
1556
+ datasets=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "dataset"],
1557
+ spaces=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "space"],
1558
1558
  )
1559
1559
 
1560
1560
  @validate_hf_hub_args
@@ -1613,9 +1613,7 @@ class HfApi:
1613
1613
  path = (
1614
1614
  f"{self.endpoint}/api/models/{repo_id}"
1615
1615
  if revision is None
1616
- else (
1617
- f"{self.endpoint}/api/models/{repo_id}/revision/{quote(revision, safe='')}"
1618
- )
1616
+ else (f"{self.endpoint}/api/models/{repo_id}/revision/{quote(revision, safe='')}")
1619
1617
  )
1620
1618
  params = {}
1621
1619
  if securityStatus:
@@ -1679,9 +1677,7 @@ class HfApi:
1679
1677
  path = (
1680
1678
  f"{self.endpoint}/api/datasets/{repo_id}"
1681
1679
  if revision is None
1682
- else (
1683
- f"{self.endpoint}/api/datasets/{repo_id}/revision/{quote(revision, safe='')}"
1684
- )
1680
+ else (f"{self.endpoint}/api/datasets/{repo_id}/revision/{quote(revision, safe='')}")
1685
1681
  )
1686
1682
  params = {}
1687
1683
  if files_metadata:
@@ -1744,9 +1740,7 @@ class HfApi:
1744
1740
  path = (
1745
1741
  f"{self.endpoint}/api/spaces/{repo_id}"
1746
1742
  if revision is None
1747
- else (
1748
- f"{self.endpoint}/api/spaces/{repo_id}/revision/{quote(revision, safe='')}"
1749
- )
1743
+ else (f"{self.endpoint}/api/spaces/{repo_id}/revision/{quote(revision, safe='')}")
1750
1744
  )
1751
1745
  params = {}
1752
1746
  if files_metadata:
@@ -1778,6 +1772,9 @@ class HfApi:
1778
1772
  revision (`str`, *optional*):
1779
1773
  The revision of the repository from which to get the
1780
1774
  information.
1775
+ repo_type (`str`, *optional*):
1776
+ Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
1777
+ `None` or `"model"` if getting repository info from a model. Default is `None`.
1781
1778
  timeout (`float`, *optional*):
1782
1779
  Whether to set a timeout for the request to the Hub.
1783
1780
  files_metadata (`bool`, *optional*):
@@ -1857,6 +1854,7 @@ class HfApi:
1857
1854
  Returns:
1858
1855
  `List[str]`: the list of files in a given repository.
1859
1856
  """
1857
+ # TODO: use https://huggingface.co/api/{repo_type}/{repo_id}/tree/{revision}/{subfolder}
1860
1858
  repo_info = self.repo_info(
1861
1859
  repo_id,
1862
1860
  revision=revision,
@@ -1927,6 +1925,84 @@ class HfApi:
1927
1925
  tags=[GitRefInfo(item) for item in data["tags"]],
1928
1926
  )
1929
1927
 
1928
+ @validate_hf_hub_args
1929
+ def list_repo_commits(
1930
+ self,
1931
+ repo_id: str,
1932
+ *,
1933
+ repo_type: Optional[str] = None,
1934
+ token: Optional[Union[bool, str]] = None,
1935
+ revision: Optional[str] = None,
1936
+ formatted: bool = False,
1937
+ ) -> List[GitCommitInfo]:
1938
+ """
1939
+ Get the list of commits of a given revision for a repo on the Hub.
1940
+
1941
+ Commits are sorted by date (last commit first).
1942
+
1943
+ Args:
1944
+ repo_id (`str`):
1945
+ A namespace (user or an organization) and a repo name separated by a `/`.
1946
+ repo_type (`str`, *optional*):
1947
+ Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if
1948
+ listing from a model. Default is `None`.
1949
+ token (`bool` or `str`, *optional*):
1950
+ A valid authentication token (see https://huggingface.co/settings/token).
1951
+ If `None` or `True` and machine is logged in (through `huggingface-cli login`
1952
+ or [`~huggingface_hub.login`]), token will be retrieved from the cache.
1953
+ If `False`, token is not sent in the request header.
1954
+ revision (`str`, *optional*):
1955
+ The git revision to commit from. Defaults to the head of the `"main"` branch.
1956
+ formatted (`bool`):
1957
+ Whether to return the HTML-formatted title and description of the commits. Defaults to False.
1958
+
1959
+ Example:
1960
+ ```py
1961
+ >>> from huggingface_hub import HfApi
1962
+ >>> api = HfApi()
1963
+
1964
+ # Commits are sorted by date (last commit first)
1965
+ >>> initial_commit = api.list_repo_commits("gpt2")[-1]
1966
+
1967
+ # Initial commit is always a system commit containing the `.gitattributes` file.
1968
+ >>> initial_commit
1969
+ GitCommitInfo(
1970
+ commit_id='9b865efde13a30c13e0a33e536cf3e4a5a9d71d8',
1971
+ authors=['system'],
1972
+ created_at=datetime.datetime(2019, 2, 18, 10, 36, 15, tzinfo=datetime.timezone.utc),
1973
+ title='initial commit',
1974
+ message='',
1975
+ formatted_title=None,
1976
+ formatted_message=None
1977
+ )
1978
+
1979
+ # Create an empty branch by deriving from initial commit
1980
+ >>> api.create_branch("gpt2", "new_empty_branch", revision=initial_commit.commit_id)
1981
+ ```
1982
+
1983
+ Returns:
1984
+ List[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub.
1985
+
1986
+ Raises:
1987
+ [`~utils.RepositoryNotFoundError`]:
1988
+ If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo
1989
+ does not exist.
1990
+ [`~utils.RevisionNotFoundError`]:
1991
+ If revision is not found (error 404) on the repo.
1992
+ """
1993
+ repo_type = repo_type or REPO_TYPE_MODEL
1994
+ revision = quote(revision, safe="") if revision is not None else DEFAULT_REVISION
1995
+
1996
+ # Paginate over results and return the list of commits.
1997
+ return [
1998
+ GitCommitInfo(item)
1999
+ for item in paginate(
2000
+ f"{self.endpoint}/api/{repo_type}s/{repo_id}/commits/{revision}",
2001
+ headers=self._build_hf_headers(token=token),
2002
+ params={"expand[]": "formatted"} if formatted else {},
2003
+ )
2004
+ ]
2005
+
1930
2006
  @validate_hf_hub_args
1931
2007
  def create_repo(
1932
2008
  self,
@@ -1981,23 +2057,17 @@ class HfApi:
1981
2057
  f" of {SPACES_SDK_TYPES} when repo_type is 'space'`"
1982
2058
  )
1983
2059
  if space_sdk not in SPACES_SDK_TYPES:
1984
- raise ValueError(
1985
- f"Invalid space_sdk. Please choose one of {SPACES_SDK_TYPES}."
1986
- )
2060
+ raise ValueError(f"Invalid space_sdk. Please choose one of {SPACES_SDK_TYPES}.")
1987
2061
  json["sdk"] = space_sdk
1988
2062
 
1989
2063
  if space_sdk is not None and repo_type != "space":
1990
- warnings.warn(
1991
- "Ignoring provided space_sdk because repo_type is not 'space'."
1992
- )
2064
+ warnings.warn("Ignoring provided space_sdk because repo_type is not 'space'.")
1993
2065
 
1994
2066
  if space_hardware is not None:
1995
2067
  if repo_type == "space":
1996
2068
  json["hardware"] = space_hardware
1997
2069
  else:
1998
- warnings.warn(
1999
- "Ignoring provided space_hardware because repo_type is not 'space'."
2000
- )
2070
+ warnings.warn("Ignoring provided space_hardware because repo_type is not 'space'.")
2001
2071
 
2002
2072
  if getattr(self, "_lfsmultipartthresh", None):
2003
2073
  # Testing purposes only.
@@ -2012,6 +2082,15 @@ class HfApi:
2012
2082
  if exist_ok and err.response.status_code == 409:
2013
2083
  # Repo already exists and `exist_ok=True`
2014
2084
  pass
2085
+ elif exist_ok and err.response.status_code == 403:
2086
+ # No write permission on the namespace but repo might already exist
2087
+ try:
2088
+ self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token)
2089
+ if repo_type is None or repo_type == REPO_TYPE_MODEL:
2090
+ return RepoUrl(f"{self.endpoint}/{repo_id}")
2091
+ return RepoUrl(f"{self.endpoint}/{repo_type}/{repo_id}")
2092
+ except HfHubHTTPError:
2093
+ raise
2015
2094
  else:
2016
2095
  raise
2017
2096
 
@@ -2164,16 +2243,10 @@ class HfApi:
2164
2243
  </Tip>
2165
2244
  """
2166
2245
  if len(from_id.split("/")) != 2:
2167
- raise ValueError(
2168
- f"Invalid repo_id: {from_id}. It should have a namespace"
2169
- " (:namespace:/:repo_name:)"
2170
- )
2246
+ raise ValueError(f"Invalid repo_id: {from_id}. It should have a namespace (:namespace:/:repo_name:)")
2171
2247
 
2172
2248
  if len(to_id.split("/")) != 2:
2173
- raise ValueError(
2174
- f"Invalid repo_id: {to_id}. It should have a namespace"
2175
- " (:namespace:/:repo_name:)"
2176
- )
2249
+ raise ValueError(f"Invalid repo_id: {to_id}. It should have a namespace (:namespace:/:repo_name:)")
2177
2250
 
2178
2251
  if repo_type is None:
2179
2252
  repo_type = REPO_TYPE_MODEL # Hub won't accept `None`.
@@ -2298,22 +2371,17 @@ class HfApi:
2298
2371
 
2299
2372
  if parent_commit is not None and not REGEX_COMMIT_OID.fullmatch(parent_commit):
2300
2373
  raise ValueError(
2301
- "`parent_commit` is not a valid commit OID. It must match the"
2302
- f" following regex: {REGEX_COMMIT_OID}"
2374
+ f"`parent_commit` is not a valid commit OID. It must match the following regex: {REGEX_COMMIT_OID}"
2303
2375
  )
2304
2376
 
2305
2377
  if commit_message is None or len(commit_message) == 0:
2306
2378
  raise ValueError("`commit_message` can't be empty, please pass a value.")
2307
2379
 
2308
- commit_description = (
2309
- commit_description if commit_description is not None else ""
2310
- )
2380
+ commit_description = commit_description if commit_description is not None else ""
2311
2381
  repo_type = repo_type if repo_type is not None else REPO_TYPE_MODEL
2312
2382
  if repo_type not in REPO_TYPES:
2313
2383
  raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}")
2314
- revision = (
2315
- quote(revision, safe="") if revision is not None else DEFAULT_REVISION
2316
- )
2384
+ revision = quote(revision, safe="") if revision is not None else DEFAULT_REVISION
2317
2385
  create_pr = create_pr if create_pr is not None else False
2318
2386
 
2319
2387
  operations = list(operations)
@@ -2321,10 +2389,7 @@ class HfApi:
2321
2389
  nb_additions = len(additions)
2322
2390
  nb_deletions = len(operations) - nb_additions
2323
2391
 
2324
- logger.debug(
2325
- f"About to commit to the hub: {len(additions)} addition(s) and"
2326
- f" {nb_deletions} deletion(s)."
2327
- )
2392
+ logger.debug(f"About to commit to the hub: {len(additions)} addition(s) and {nb_deletions} deletion(s).")
2328
2393
 
2329
2394
  # If updating twice the same file or update then delete a file in a single commit
2330
2395
  warn_on_overwriting_operations(operations)
@@ -2344,11 +2409,7 @@ class HfApi:
2344
2409
  raise
2345
2410
 
2346
2411
  upload_lfs_files(
2347
- additions=[
2348
- addition
2349
- for addition in additions
2350
- if upload_modes[addition.path_in_repo] == "lfs"
2351
- ],
2412
+ additions=[addition for addition in additions if upload_modes[addition.path_in_repo] == "lfs"],
2352
2413
  repo_type=repo_type,
2353
2414
  repo_id=repo_id,
2354
2415
  token=token or self.token,
@@ -2374,14 +2435,11 @@ class HfApi:
2374
2435
  "Content-Type": "application/x-ndjson",
2375
2436
  **self._build_hf_headers(token=token, is_write_action=True),
2376
2437
  }
2438
+ data = b"".join(_payload_as_ndjson())
2439
+ params = {"create_pr": "1"} if create_pr else None
2377
2440
 
2378
2441
  try:
2379
- commit_resp = requests.post(
2380
- url=commit_url,
2381
- headers=headers,
2382
- data=_payload_as_ndjson(), # type: ignore
2383
- params={"create_pr": "1"} if create_pr else None,
2384
- )
2442
+ commit_resp = requests.post(url=commit_url, headers=headers, data=data, params=params)
2385
2443
  hf_raise_for_status(commit_resp, endpoint_name="commit")
2386
2444
  except RepositoryNotFoundError as e:
2387
2445
  e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
@@ -2525,9 +2583,7 @@ class HfApi:
2525
2583
  raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}")
2526
2584
 
2527
2585
  commit_message = (
2528
- commit_message
2529
- if commit_message is not None
2530
- else f"Upload {path_in_repo} with huggingface_hub"
2586
+ commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub"
2531
2587
  )
2532
2588
  operation = CommitOperationAdd(
2533
2589
  path_or_fileobj=path_or_fileobj,
@@ -2570,21 +2626,25 @@ class HfApi:
2570
2626
  parent_commit: Optional[str] = None,
2571
2627
  allow_patterns: Optional[Union[List[str], str]] = None,
2572
2628
  ignore_patterns: Optional[Union[List[str], str]] = None,
2629
+ delete_patterns: Optional[Union[List[str], str]] = None,
2573
2630
  ):
2574
2631
  """
2575
- Upload a local folder to the given repo. The upload is done
2576
- through a HTTP requests, and doesn't require git or git-lfs to be
2577
- installed.
2632
+ Upload a local folder to the given repo. The upload is done through a HTTP request and doesn't require git or
2633
+ git-lfs to be installed.
2634
+
2635
+ The structure of the folder will be preserved. Files with the same name already present in the repository will
2636
+ be overwritten. Others will be left untouched.
2578
2637
 
2579
- The structure of the folder will be preserved. Files with the same name
2580
- already present in the repository will be overwritten, others will be left untouched.
2638
+ Use the `allow_patterns` and `ignore_patterns` arguments to specify which files to upload. These parameters
2639
+ accept either a single pattern or a list of patterns. Patterns are Standard Wildcards (globbing patterns) as
2640
+ documented [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm). If both `allow_patterns` and
2641
+ `ignore_patterns` are provided, both constraints apply. By default, all files from the folder are uploaded.
2581
2642
 
2582
- Use the `allow_patterns` and `ignore_patterns` arguments to specify which files
2583
- to upload. These parameters accept either a single pattern or a list of
2584
- patterns. Patterns are Standard Wildcards (globbing patterns) as documented
2585
- [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm). If both
2586
- `allow_patterns` and `ignore_patterns` are provided, both constraints apply. By
2587
- default, all files from the folder are uploaded.
2643
+ Use the `delete_patterns` argument to specify remote files you want to delete. Input type is the same as for
2644
+ `allow_patterns` (see above). If `path_in_repo` is also provided, the patterns are matched against paths
2645
+ relative to this folder. For example, `upload_folder(..., path_in_repo="experiment", delete_patterns="logs/*")`
2646
+ will delete any remote file under `experiment/logs/`. Note that the `.gitattributes` file will not be deleted
2647
+ even if it matches the patterns.
2588
2648
 
2589
2649
  Uses `HfApi.create_commit` under the hood.
2590
2650
 
@@ -2627,6 +2687,10 @@ class HfApi:
2627
2687
  If provided, only files matching at least one pattern are uploaded.
2628
2688
  ignore_patterns (`List[str]` or `str`, *optional*):
2629
2689
  If provided, files matching any of the patterns are not uploaded.
2690
+ delete_patterns (`List[str]` or `str`, *optional*):
2691
+ If provided, remote files matching any of the patterns will be deleted from the repo while committing
2692
+ new files. This is useful if you don't know which files have already been uploaded.
2693
+ Note: to avoid discrepancies the `.gitattributes` file is not deleted even if it matches the pattern.
2630
2694
 
2631
2695
  Returns:
2632
2696
  `str`: A URL to visualize the uploaded folder on the hub
@@ -2644,16 +2708,16 @@ class HfApi:
2644
2708
 
2645
2709
  <Tip warning={true}>
2646
2710
 
2647
- `upload_folder` assumes that the repo already exists on the Hub. If you get a
2648
- Client error 404, please make sure you are authenticated and that `repo_id` and
2649
- `repo_type` are set correctly. If repo does not exist, create it first using
2650
- [`~hf_api.create_repo`].
2711
+ `upload_folder` assumes that the repo already exists on the Hub. If you get a Client error 404, please make
2712
+ sure you are authenticated and that `repo_id` and `repo_type` are set correctly. If repo does not exist, create
2713
+ it first using [`~hf_api.create_repo`].
2651
2714
 
2652
2715
  </Tip>
2653
2716
 
2654
2717
  Example:
2655
2718
 
2656
2719
  ```python
2720
+ # Upload checkpoints folder except the log files
2657
2721
  >>> upload_folder(
2658
2722
  ... folder_path="local/checkpoints",
2659
2723
  ... path_in_repo="remote/experiment/checkpoints",
@@ -2664,6 +2728,19 @@ class HfApi:
2664
2728
  ... )
2665
2729
  # "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints"
2666
2730
 
2731
+ # Upload checkpoints folder including logs while deleting existing logs from the repo
2732
+ # Useful if you don't know exactly which log files have already being pushed
2733
+ >>> upload_folder(
2734
+ ... folder_path="local/checkpoints",
2735
+ ... path_in_repo="remote/experiment/checkpoints",
2736
+ ... repo_id="username/my-dataset",
2737
+ ... repo_type="datasets",
2738
+ ... token="my_token",
2739
+ ... delete_patterns="**/logs/*.txt",
2740
+ ... )
2741
+ "https://huggingface.co/datasets/username/my-dataset/tree/main/remote/experiment/checkpoints"
2742
+
2743
+ # Upload checkpoints folder while creating a PR
2667
2744
  >>> upload_folder(
2668
2745
  ... folder_path="local/checkpoints",
2669
2746
  ... path_in_repo="remote/experiment/checkpoints",
@@ -2684,22 +2761,36 @@ class HfApi:
2684
2761
  path_in_repo = ""
2685
2762
 
2686
2763
  commit_message = (
2687
- commit_message
2688
- if commit_message is not None
2689
- else f"Upload {path_in_repo} with huggingface_hub"
2764
+ commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub"
2690
2765
  )
2691
2766
 
2692
- files_to_add = _prepare_upload_folder_commit(
2767
+ delete_operations = self._prepare_upload_folder_deletions(
2768
+ repo_id=repo_id,
2769
+ repo_type=repo_type,
2770
+ revision=DEFAULT_REVISION if create_pr else revision,
2771
+ token=token,
2772
+ path_in_repo=path_in_repo,
2773
+ delete_patterns=delete_patterns,
2774
+ )
2775
+ add_operations = _prepare_upload_folder_additions(
2693
2776
  folder_path,
2694
2777
  path_in_repo,
2695
2778
  allow_patterns=allow_patterns,
2696
2779
  ignore_patterns=ignore_patterns,
2697
2780
  )
2698
2781
 
2782
+ # Optimize operations: if some files will be overwritten, we don't need to delete them first
2783
+ if len(add_operations) > 0:
2784
+ added_paths = set(op.path_in_repo for op in add_operations)
2785
+ delete_operations = [
2786
+ delete_op for delete_op in delete_operations if delete_op.path_in_repo not in added_paths
2787
+ ]
2788
+ commit_operations = delete_operations + add_operations
2789
+
2699
2790
  commit_info = self.create_commit(
2700
2791
  repo_type=repo_type,
2701
2792
  repo_id=repo_id,
2702
- operations=files_to_add,
2793
+ operations=commit_operations,
2703
2794
  commit_message=commit_message,
2704
2795
  commit_description=commit_description,
2705
2796
  token=token,
@@ -2787,9 +2878,7 @@ class HfApi:
2787
2878
 
2788
2879
  """
2789
2880
  commit_message = (
2790
- commit_message
2791
- if commit_message is not None
2792
- else f"Delete {path_in_repo} with huggingface_hub"
2881
+ commit_message if commit_message is not None else f"Delete {path_in_repo} with huggingface_hub"
2793
2882
  )
2794
2883
 
2795
2884
  operations = [CommitOperationDelete(path_in_repo=path_in_repo)]
@@ -2861,14 +2950,10 @@ class HfApi:
2861
2950
  repo_id=repo_id,
2862
2951
  repo_type=repo_type,
2863
2952
  token=token,
2864
- operations=[
2865
- CommitOperationDelete(path_in_repo=path_in_repo, is_folder=True)
2866
- ],
2953
+ operations=[CommitOperationDelete(path_in_repo=path_in_repo, is_folder=True)],
2867
2954
  revision=revision,
2868
2955
  commit_message=(
2869
- commit_message
2870
- if commit_message is not None
2871
- else f"Delete folder {path_in_repo} with huggingface_hub"
2956
+ commit_message if commit_message is not None else f"Delete folder {path_in_repo} with huggingface_hub"
2872
2957
  ),
2873
2958
  commit_description=commit_description,
2874
2959
  create_pr=create_pr,
@@ -2887,7 +2972,8 @@ class HfApi:
2887
2972
  exist_ok: bool = False,
2888
2973
  ) -> None:
2889
2974
  """
2890
- Create a new branch from `main` on a repo on the Hub.
2975
+ Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`).
2976
+ To find a revision suiting your needs, you can use [`list_repo_refs`] or [`list_repo_commits`].
2891
2977
 
2892
2978
  Args:
2893
2979
  repo_id (`str`):
@@ -3044,9 +3130,7 @@ class HfApi:
3044
3130
  """
3045
3131
  if repo_type is None:
3046
3132
  repo_type = REPO_TYPE_MODEL
3047
- revision = (
3048
- quote(revision, safe="") if revision is not None else DEFAULT_REVISION
3049
- )
3133
+ revision = quote(revision, safe="") if revision is not None else DEFAULT_REVISION
3050
3134
 
3051
3135
  # Prepare request
3052
3136
  tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{revision}"
@@ -3196,9 +3280,7 @@ class HfApi:
3196
3280
  headers = self._build_hf_headers(token=token)
3197
3281
 
3198
3282
  def _fetch_discussion_page(page_index: int):
3199
- path = (
3200
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions?p={page_index}"
3201
- )
3283
+ path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions?p={page_index}"
3202
3284
  resp = requests.get(path, headers=headers)
3203
3285
  hf_raise_for_status(resp)
3204
3286
  paginated_discussions = resp.json()
@@ -3272,9 +3354,7 @@ class HfApi:
3272
3354
  if repo_type is None:
3273
3355
  repo_type = REPO_TYPE_MODEL
3274
3356
 
3275
- path = (
3276
- f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions/{discussion_num}"
3277
- )
3357
+ path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions/{discussion_num}"
3278
3358
  headers = self._build_hf_headers(token=token)
3279
3359
  resp = requests.get(path, params={"diff": "1"}, headers=headers)
3280
3360
  hf_raise_for_status(resp)
@@ -3282,17 +3362,9 @@ class HfApi:
3282
3362
  discussion_details = resp.json()
3283
3363
  is_pull_request = discussion_details["isPullRequest"]
3284
3364
 
3285
- target_branch = (
3286
- discussion_details["changes"]["base"] if is_pull_request else None
3287
- )
3288
- conflicting_files = (
3289
- discussion_details["filesWithConflicts"] if is_pull_request else None
3290
- )
3291
- merge_commit_oid = (
3292
- discussion_details["changes"].get("mergeCommitId", None)
3293
- if is_pull_request
3294
- else None
3295
- )
3365
+ target_branch = discussion_details["changes"]["base"] if is_pull_request else None
3366
+ conflicting_files = discussion_details["filesWithConflicts"] if is_pull_request else None
3367
+ merge_commit_oid = discussion_details["changes"].get("mergeCommitId", None) if is_pull_request else None
3296
3368
 
3297
3369
  return DiscussionWithDetails(
3298
3370
  title=discussion_details["title"],
@@ -3413,7 +3485,7 @@ class HfApi:
3413
3485
 
3414
3486
  Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`];
3415
3487
 
3416
- This is a wrapper around [`HfApi.create_discusssion`].
3488
+ This is a wrapper around [`HfApi.create_discussion`].
3417
3489
 
3418
3490
  Args:
3419
3491
  repo_id (`str`):
@@ -3860,8 +3932,7 @@ class HfApi:
3860
3932
  </Tip>
3861
3933
  """
3862
3934
  warnings.warn(
3863
- "Hidden comments' content cannot be retrieved anymore. Hiding a comment is"
3864
- " irreversible.",
3935
+ "Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible.",
3865
3936
  UserWarning,
3866
3937
  )
3867
3938
  resp = self._post_discussion_changes(
@@ -3874,9 +3945,7 @@ class HfApi:
3874
3945
  return deserialize_event(resp.json()["updatedComment"]) # type: ignore
3875
3946
 
3876
3947
  @validate_hf_hub_args
3877
- def add_space_secret(
3878
- self, repo_id: str, key: str, value: str, *, token: Optional[str] = None
3879
- ) -> None:
3948
+ def add_space_secret(self, repo_id: str, key: str, value: str, *, token: Optional[str] = None) -> None:
3880
3949
  """Adds or updates a secret in a Space.
3881
3950
 
3882
3951
  Secrets allow to set secret keys or tokens to a Space without hardcoding them.
@@ -3900,9 +3969,7 @@ class HfApi:
3900
3969
  hf_raise_for_status(r)
3901
3970
 
3902
3971
  @validate_hf_hub_args
3903
- def delete_space_secret(
3904
- self, repo_id: str, key: str, *, token: Optional[str] = None
3905
- ) -> None:
3972
+ def delete_space_secret(self, repo_id: str, key: str, *, token: Optional[str] = None) -> None:
3906
3973
  """Deletes a secret from a Space.
3907
3974
 
3908
3975
  Secrets allow to set secret keys or tokens to a Space without hardcoding them.
@@ -3924,9 +3991,7 @@ class HfApi:
3924
3991
  hf_raise_for_status(r)
3925
3992
 
3926
3993
  @validate_hf_hub_args
3927
- def get_space_runtime(
3928
- self, repo_id: str, *, token: Optional[str] = None
3929
- ) -> SpaceRuntime:
3994
+ def get_space_runtime(self, repo_id: str, *, token: Optional[str] = None) -> SpaceRuntime:
3930
3995
  """Gets runtime information about a Space.
3931
3996
 
3932
3997
  Args:
@@ -3936,26 +4001,14 @@ class HfApi:
3936
4001
  Hugging Face token. Will default to the locally saved token if
3937
4002
  not provided.
3938
4003
  Returns:
3939
- `SpaceRuntime`: dataclass containing runtime information about a Space
3940
- including Space stage and hardware.
4004
+ [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware.
3941
4005
  """
3942
- r = requests.get(
3943
- f"{self.endpoint}/api/spaces/{repo_id}/runtime",
3944
- headers=self._build_hf_headers(token=token),
3945
- )
4006
+ r = requests.get(f"{self.endpoint}/api/spaces/{repo_id}/runtime", headers=self._build_hf_headers(token=token))
3946
4007
  hf_raise_for_status(r)
3947
- data = r.json()
3948
- return SpaceRuntime(
3949
- stage=data["stage"],
3950
- hardware=data["hardware"]["current"],
3951
- requested_hardware=data["hardware"]["requested"],
3952
- raw=data,
3953
- )
4008
+ return SpaceRuntime(r.json())
3954
4009
 
3955
4010
  @validate_hf_hub_args
3956
- def request_space_hardware(
3957
- self, repo_id: str, hardware: SpaceHardware, *, token: Optional[str] = None
3958
- ) -> None:
4011
+ def request_space_hardware(self, repo_id: str, hardware: SpaceHardware, *, token: Optional[str] = None) -> None:
3959
4012
  """Request new hardware for a Space.
3960
4013
 
3961
4014
  Args:
@@ -3968,8 +4021,7 @@ class HfApi:
3968
4021
 
3969
4022
  <Tip>
3970
4023
 
3971
- It is also possible to request hardware directly when creating the Space repo!
3972
- See [`create_repo`] for details.
4024
+ It is also possible to request hardware directly when creating the Space repo! See [`create_repo`] for details.
3973
4025
 
3974
4026
  </Tip>
3975
4027
  """
@@ -3980,6 +4032,161 @@ class HfApi:
3980
4032
  )
3981
4033
  hf_raise_for_status(r)
3982
4034
 
4035
+ @validate_hf_hub_args
4036
+ def pause_space(self, repo_id: str, *, token: Optional[str] = None) -> SpaceRuntime:
4037
+ """Pause your Space.
4038
+
4039
+ A paused Space stops executing until manually restarted by its owner. This is different from the sleeping
4040
+ state in which free Spaces go after 72h of inactivity. Paused time is not billed to your account, no matter the
4041
+ hardware you've selected. To restart your Space, use [`restart_space`] and go to your Space settings page.
4042
+
4043
+ For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause).
4044
+
4045
+ Args:
4046
+ repo_id (`str`):
4047
+ ID of the Space to pause. Example: `"Salesforce/BLIP2"`.
4048
+ token (`str`, *optional*):
4049
+ Hugging Face token. Will default to the locally saved token if not provided.
4050
+
4051
+ Returns:
4052
+ [`SpaceRuntime`]: Runtime information about your Space including `stage=PAUSED` and requested hardware.
4053
+
4054
+ Raises:
4055
+ [`~utils.RepositoryNotFoundError`]:
4056
+ If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you
4057
+ are not authenticated.
4058
+ [`~utils.HfHubHTTPError`]:
4059
+ 403 Forbidden: only the owner of a Space can pause it. If you want to manage a Space that you don't
4060
+ own, either ask the owner by opening a Discussion or duplicate the Space.
4061
+ [`~utils.BadRequestError`]:
4062
+ If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide
4063
+ a static Space, you can set it to private.
4064
+ """
4065
+ r = requests.post(f"{self.endpoint}/api/spaces/{repo_id}/pause", headers=self._build_hf_headers(token=token))
4066
+ hf_raise_for_status(r)
4067
+ return SpaceRuntime(r.json())
4068
+
4069
+ @validate_hf_hub_args
4070
+ def restart_space(self, repo_id: str, *, token: Optional[str] = None) -> SpaceRuntime:
4071
+ """Restart your Space.
4072
+
4073
+ This is the only way to programmatically restart a Space if you've put it on Pause (see [`pause_space`]). You
4074
+ must be the owner of the Space to restart it. If you are using an upgraded hardware, your account will be
4075
+ billed as soon as the Space is restarted. You can trigger a restart no matter the current state of a Space.
4076
+
4077
+ For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause).
4078
+
4079
+ Args:
4080
+ repo_id (`str`):
4081
+ ID of the Space to restart. Example: `"Salesforce/BLIP2"`.
4082
+ token (`str`, *optional*):
4083
+ Hugging Face token. Will default to the locally saved token if not provided.
4084
+
4085
+ Returns:
4086
+ [`SpaceRuntime`]: Runtime information about your Space.
4087
+
4088
+ Raises:
4089
+ [`~utils.RepositoryNotFoundError`]:
4090
+ If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you
4091
+ are not authenticated.
4092
+ [`~utils.HfHubHTTPError`]:
4093
+ 403 Forbidden: only the owner of a Space can restart it. If you want to restart a Space that you don't
4094
+ own, either ask the owner by opening a Discussion or duplicate the Space.
4095
+ [`~utils.BadRequestError`]:
4096
+ If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide
4097
+ a static Space, you can set it to private.
4098
+ """
4099
+ r = requests.post(f"{self.endpoint}/api/spaces/{repo_id}/restart", headers=self._build_hf_headers(token=token))
4100
+ hf_raise_for_status(r)
4101
+ return SpaceRuntime(r.json())
4102
+
4103
+ @validate_hf_hub_args
4104
+ def duplicate_space(
4105
+ self,
4106
+ from_id: str,
4107
+ to_id: Optional[str] = None,
4108
+ *,
4109
+ private: Optional[bool] = None,
4110
+ token: Optional[str] = None,
4111
+ exist_ok: bool = False,
4112
+ ) -> str:
4113
+ """Duplicate a Space.
4114
+
4115
+ Programmatically duplicate a Space. The new Space will be created in your account and will be in the same state
4116
+ as the original Space (running or paused). You can duplicate a Space no matter the current state of a Space.
4117
+
4118
+ Args:
4119
+ from_id (`str`):
4120
+ ID of the Space to duplicate. Example: `"pharma/CLIP-Interrogator"`.
4121
+ to_id (`str`, *optional*):
4122
+ ID of the new Space. Example: `"dog/CLIP-Interrogator"`. If not provided, the new Space will have the same
4123
+ name as the original Space, but in your account.
4124
+ private (`bool`, *optional*):
4125
+ Whether the new Space should be private or not. Defaults to the same privacy as the original Space.
4126
+ token (`str`, *optional*):
4127
+ Hugging Face token. Will default to the locally saved token if not provided.
4128
+ exist_ok (`bool`, *optional*, defaults to `False`):
4129
+ If `True`, do not raise an error if repo already exists.
4130
+
4131
+ Returns:
4132
+ [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing
4133
+ attributes like `endpoint`, `repo_type` and `repo_id`.
4134
+
4135
+ Raises:
4136
+ - [`HTTPError`](https://2.python-requests.org/en/master/api/#requests.HTTPError)
4137
+ if the HuggingFace API returned an error
4138
+ - [`~utils.RepositoryNotFoundError`]
4139
+ If one of `from_id` or `to_id` cannot be found. This may be because it doesn't exist,
4140
+ or because it is set to `private` and you do not have access.
4141
+
4142
+ Example:
4143
+ ```python
4144
+ >>> from huggingface_hub import duplicate_space
4145
+
4146
+ # Duplicate a Space to your account
4147
+ >>> duplicate_space("multimodalart/dreambooth-training")
4148
+ RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...)
4149
+
4150
+ # Can set custom destination id and visibility flag.
4151
+ >>> duplicate_space("multimodalart/dreambooth-training", to_id="my-dreambooth", private=True)
4152
+ RepoUrl('https://huggingface.co/spaces/nateraw/my-dreambooth',...)
4153
+ ```
4154
+ """
4155
+ # Parse to_id if provided
4156
+ parsed_to_id = RepoUrl(to_id) if to_id is not None else None
4157
+
4158
+ # Infer target repo_id
4159
+ to_namespace = ( # set namespace manually or default to username
4160
+ parsed_to_id.namespace
4161
+ if parsed_to_id is not None and parsed_to_id.namespace is not None
4162
+ else self.whoami(token)["name"]
4163
+ )
4164
+ to_repo_name = parsed_to_id.repo_name if to_id is not None else RepoUrl(from_id).repo_name # type: ignore
4165
+
4166
+ # repository must be a valid repo_id (namespace/repo_name).
4167
+ payload: Dict[str, Any] = {"repository": f"{to_namespace}/{to_repo_name}"}
4168
+
4169
+ # private is optional with this endpoint, with None defaulting to the original space's privacy.
4170
+ if private is not None:
4171
+ payload["private"] = private
4172
+
4173
+ r = requests.post(
4174
+ f"{self.endpoint}/api/spaces/{from_id}/duplicate",
4175
+ headers=self._build_hf_headers(token=token, is_write_action=True),
4176
+ json=payload,
4177
+ )
4178
+
4179
+ try:
4180
+ hf_raise_for_status(r)
4181
+ except HTTPError as err:
4182
+ if exist_ok and err.response.status_code == 409:
4183
+ # Repo already exists and `exist_ok=True`
4184
+ pass
4185
+ else:
4186
+ raise
4187
+
4188
+ return RepoUrl(r.json()["url"], endpoint=self.endpoint)
4189
+
3983
4190
  def _build_hf_headers(
3984
4191
  self,
3985
4192
  token: Optional[Union[bool, str]] = None,
@@ -4003,8 +4210,48 @@ class HfApi:
4003
4210
  user_agent=user_agent or self.user_agent,
4004
4211
  )
4005
4212
 
4213
+ def _prepare_upload_folder_deletions(
4214
+ self,
4215
+ repo_id: str,
4216
+ repo_type: Optional[str],
4217
+ revision: Optional[str],
4218
+ token: Optional[str],
4219
+ path_in_repo: str,
4220
+ delete_patterns: Optional[Union[List[str], str]],
4221
+ ) -> List[CommitOperationDelete]:
4222
+ """Generate the list of Delete operations for a commit to delete files from a repo.
4006
4223
 
4007
- def _prepare_upload_folder_commit(
4224
+ List remote files and match them against the `delete_patterns` constraints. Returns a list of [`CommitOperationDelete`]
4225
+ with the matching items.
4226
+
4227
+ Note: `.gitattributes` file is essential to make a repo work properly on the Hub. This file will always be
4228
+ kept even if it matches the `delete_patterns` constraints.
4229
+ """
4230
+ if delete_patterns is None:
4231
+ # If no delete patterns, no need to list and filter remote files
4232
+ return []
4233
+
4234
+ # List remote files
4235
+ filenames = self.list_repo_files(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
4236
+
4237
+ # Compute relative path in repo
4238
+ if path_in_repo:
4239
+ path_in_repo = path_in_repo.strip("/") + "/" # harmonize
4240
+ relpath_to_abspath = {
4241
+ file[len(path_in_repo) :]: file for file in filenames if file.startswith(path_in_repo)
4242
+ }
4243
+ else:
4244
+ relpath_to_abspath = {file: file for file in filenames}
4245
+
4246
+ # Apply filter on relative paths and return
4247
+ return [
4248
+ CommitOperationDelete(path_in_repo=relpath_to_abspath[relpath], is_folder=False)
4249
+ for relpath in filter_repo_objects(relpath_to_abspath.keys(), allow_patterns=delete_patterns)
4250
+ if relpath_to_abspath[relpath] != ".gitattributes"
4251
+ ]
4252
+
4253
+
4254
+ def _prepare_upload_folder_additions(
4008
4255
  folder_path: Union[str, Path],
4009
4256
  path_in_repo: str,
4010
4257
  allow_patterns: Optional[Union[List[str], str]] = None,
@@ -4015,32 +4262,29 @@ def _prepare_upload_folder_commit(
4015
4262
  Files not matching the `allow_patterns` (allowlist) and `ignore_patterns` (denylist)
4016
4263
  constraints are discarded.
4017
4264
  """
4018
- folder_path = os.path.normpath(os.path.expanduser(folder_path))
4019
- if not os.path.isdir(folder_path):
4265
+ folder_path = Path(folder_path).expanduser().resolve()
4266
+ if not folder_path.is_dir():
4020
4267
  raise ValueError(f"Provided path: '{folder_path}' is not a directory")
4021
4268
 
4022
- files_to_add: List[CommitOperationAdd] = []
4023
- for dirpath, _, filenames in os.walk(folder_path):
4024
- for filename in filenames:
4025
- abs_path = os.path.join(dirpath, filename)
4026
- rel_path = os.path.relpath(abs_path, folder_path)
4027
- files_to_add.append(
4028
- CommitOperationAdd(
4029
- path_or_fileobj=abs_path,
4030
- path_in_repo=os.path.normpath(
4031
- os.path.join(path_in_repo, rel_path)
4032
- ).replace(os.sep, "/"),
4033
- )
4034
- )
4035
-
4036
- return list(
4037
- filter_repo_objects(
4038
- files_to_add,
4039
- allow_patterns=allow_patterns,
4040
- ignore_patterns=ignore_patterns,
4041
- key=lambda x: x.path_in_repo,
4269
+ # List files from folder
4270
+ relpath_to_abspath = {
4271
+ path.relative_to(folder_path).as_posix(): path
4272
+ for path in sorted(folder_path.glob("**/*")) # sorted to be deterministic
4273
+ if path.is_file()
4274
+ }
4275
+
4276
+ # Filter files and return
4277
+ # Patterns are applied on the path relative to `folder_path`. `path_in_repo` is prefixed after the filtering.
4278
+ prefix = f"{path_in_repo.strip('/')}/" if path_in_repo else ""
4279
+ return [
4280
+ CommitOperationAdd(
4281
+ path_or_fileobj=relpath_to_abspath[relpath], # absolute path on disk
4282
+ path_in_repo=prefix + relpath, # "absolute" path in repo
4042
4283
  )
4043
- )
4284
+ for relpath in filter_repo_objects(
4285
+ relpath_to_abspath.keys(), allow_patterns=allow_patterns, ignore_patterns=ignore_patterns
4286
+ )
4287
+ ]
4044
4288
 
4045
4289
 
4046
4290
  def _parse_revision_from_pr_url(pr_url: str) -> str:
@@ -4054,10 +4298,7 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:
4054
4298
  """
4055
4299
  re_match = re.match(_REGEX_DISCUSSION_URL, pr_url)
4056
4300
  if re_match is None:
4057
- raise RuntimeError(
4058
- "Unexpected response from the hub, expected a Pull Request URL but got:"
4059
- f" '{pr_url}'"
4060
- )
4301
+ raise RuntimeError(f"Unexpected response from the hub, expected a Pull Request URL but got: '{pr_url}'")
4061
4302
  return f"refs/pr/{re_match[1]}"
4062
4303
 
4063
4304
 
@@ -4080,6 +4321,7 @@ space_info = api.space_info
4080
4321
  repo_info = api.repo_info
4081
4322
  list_repo_files = api.list_repo_files
4082
4323
  list_repo_refs = api.list_repo_refs
4324
+ list_repo_commits = api.list_repo_commits
4083
4325
 
4084
4326
  list_metrics = api.list_metrics
4085
4327
 
@@ -4122,3 +4364,6 @@ add_space_secret = api.add_space_secret
4122
4364
  delete_space_secret = api.delete_space_secret
4123
4365
  get_space_runtime = api.get_space_runtime
4124
4366
  request_space_hardware = api.request_space_hardware
4367
+ pause_space = api.pause_space
4368
+ restart_space = api.restart_space
4369
+ duplicate_space = api.duplicate_space