huggingface-hub 0.26.4__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (61) hide show
  1. huggingface_hub/__init__.py +49 -23
  2. huggingface_hub/_commit_scheduler.py +30 -4
  3. huggingface_hub/_local_folder.py +0 -4
  4. huggingface_hub/_login.py +38 -54
  5. huggingface_hub/_snapshot_download.py +6 -3
  6. huggingface_hub/_tensorboard_logger.py +2 -3
  7. huggingface_hub/_upload_large_folder.py +1 -1
  8. huggingface_hub/errors.py +19 -0
  9. huggingface_hub/fastai_utils.py +3 -2
  10. huggingface_hub/file_download.py +10 -12
  11. huggingface_hub/hf_api.py +102 -498
  12. huggingface_hub/hf_file_system.py +274 -35
  13. huggingface_hub/hub_mixin.py +5 -25
  14. huggingface_hub/inference/_client.py +185 -136
  15. huggingface_hub/inference/_common.py +2 -2
  16. huggingface_hub/inference/_generated/_async_client.py +186 -137
  17. huggingface_hub/inference/_generated/types/__init__.py +31 -10
  18. huggingface_hub/inference/_generated/types/audio_classification.py +3 -5
  19. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +6 -9
  20. huggingface_hub/inference/_generated/types/chat_completion.py +8 -5
  21. huggingface_hub/inference/_generated/types/depth_estimation.py +1 -1
  22. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -6
  23. huggingface_hub/inference/_generated/types/feature_extraction.py +1 -1
  24. huggingface_hub/inference/_generated/types/fill_mask.py +2 -4
  25. huggingface_hub/inference/_generated/types/image_classification.py +3 -5
  26. huggingface_hub/inference/_generated/types/image_segmentation.py +2 -4
  27. huggingface_hub/inference/_generated/types/image_to_image.py +2 -4
  28. huggingface_hub/inference/_generated/types/image_to_text.py +6 -9
  29. huggingface_hub/inference/_generated/types/object_detection.py +2 -4
  30. huggingface_hub/inference/_generated/types/question_answering.py +2 -4
  31. huggingface_hub/inference/_generated/types/sentence_similarity.py +1 -1
  32. huggingface_hub/inference/_generated/types/summarization.py +2 -4
  33. huggingface_hub/inference/_generated/types/table_question_answering.py +21 -3
  34. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -4
  35. huggingface_hub/inference/_generated/types/text_classification.py +4 -10
  36. huggingface_hub/inference/_generated/types/text_to_audio.py +7 -10
  37. huggingface_hub/inference/_generated/types/text_to_image.py +2 -4
  38. huggingface_hub/inference/_generated/types/text_to_speech.py +7 -10
  39. huggingface_hub/inference/_generated/types/token_classification.py +11 -12
  40. huggingface_hub/inference/_generated/types/translation.py +2 -4
  41. huggingface_hub/inference/_generated/types/video_classification.py +3 -4
  42. huggingface_hub/inference/_generated/types/visual_question_answering.py +2 -5
  43. huggingface_hub/inference/_generated/types/zero_shot_classification.py +8 -18
  44. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +9 -19
  45. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +7 -9
  46. huggingface_hub/keras_mixin.py +3 -2
  47. huggingface_hub/lfs.py +2 -5
  48. huggingface_hub/repocard_data.py +4 -4
  49. huggingface_hub/serialization/__init__.py +2 -0
  50. huggingface_hub/serialization/_dduf.py +387 -0
  51. huggingface_hub/serialization/_torch.py +361 -14
  52. huggingface_hub/utils/_cache_manager.py +1 -1
  53. huggingface_hub/utils/_headers.py +9 -25
  54. huggingface_hub/utils/tqdm.py +15 -0
  55. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/METADATA +8 -3
  56. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/RECORD +60 -60
  57. huggingface_hub/_multi_commits.py +0 -306
  58. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/LICENSE +0 -0
  59. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/WHEEL +0 -0
  60. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/entry_points.txt +0 -0
  61. {huggingface_hub-0.26.4.dist-info → huggingface_hub-0.27.0.dist-info}/top_level.txt +0 -0
huggingface_hub/hf_api.py CHANGED
@@ -61,19 +61,6 @@ from ._commit_api import (
61
61
  _warn_on_overwriting_operations,
62
62
  )
63
63
  from ._inference_endpoints import InferenceEndpoint, InferenceEndpointType
64
- from ._multi_commits import (
65
- MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_BAD_REQUEST_TEMPLATE,
66
- MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_NO_CHANGES_TEMPLATE,
67
- MULTI_COMMIT_PR_CLOSING_COMMENT_TEMPLATE,
68
- MULTI_COMMIT_PR_COMPLETION_COMMENT_TEMPLATE,
69
- MultiCommitException,
70
- MultiCommitStep,
71
- MultiCommitStrategy,
72
- multi_commit_create_pull_request,
73
- multi_commit_generate_comment,
74
- multi_commit_parse_pr_description,
75
- plan_multi_commits,
76
- )
77
64
  from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable
78
65
  from ._upload_large_folder import upload_large_folder_internal
79
66
  from .community import (
@@ -125,7 +112,6 @@ from .utils import (
125
112
  SafetensorsRepoMetadata,
126
113
  TensorInfo,
127
114
  build_hf_headers,
128
- experimental,
129
115
  filter_repo_objects,
130
116
  fix_hf_endpoint_in_url,
131
117
  get_session,
@@ -1126,33 +1112,6 @@ class SpaceInfo:
1126
1112
  self.__dict__.update(**kwargs)
1127
1113
 
1128
1114
 
1129
- @dataclass
1130
- class MetricInfo:
1131
- """
1132
- Contains information about a metric on the Hub.
1133
-
1134
- Attributes:
1135
- id (`str`):
1136
- ID of the metric. E.g. `"accuracy"`.
1137
- space_id (`str`):
1138
- ID of the space associated with the metric. E.g. `"Accuracy"`.
1139
- description (`str`):
1140
- Description of the metric.
1141
- """
1142
-
1143
- id: str
1144
- space_id: str
1145
- description: Optional[str]
1146
-
1147
- def __init__(self, **kwargs):
1148
- self.id = kwargs.pop("id")
1149
- self.space_id = kwargs.pop("spaceId")
1150
- self.description = kwargs.pop("description", None)
1151
- # backwards compatibility
1152
- self.spaceId = self.space_id
1153
- self.__dict__.update(**kwargs)
1154
-
1155
-
1156
1115
  @dataclass
1157
1116
  class CollectionItem:
1158
1117
  """
@@ -1563,6 +1522,36 @@ def future_compatible(fn: CallableT) -> CallableT:
1563
1522
 
1564
1523
 
1565
1524
  class HfApi:
1525
+ """
1526
+ Client to interact with the Hugging Face Hub via HTTP.
1527
+
1528
+ The client is initialized with some high-level settings used in all requests
1529
+ made to the Hub (HF endpoint, authentication, user agents...). Using the `HfApi`
1530
+ client is preferred but not mandatory as all of its public methods are exposed
1531
+ directly at the root of `huggingface_hub`.
1532
+
1533
+ Args:
1534
+ endpoint (`str`, *optional*):
1535
+ Endpoint of the Hub. Defaults to <https://huggingface.co>.
1536
+ token (Union[bool, str, None], optional):
1537
+ A valid user access token (string). Defaults to the locally saved
1538
+ token, which is the recommended method for authentication (see
1539
+ https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
1540
+ To disable authentication, pass `False`.
1541
+ library_name (`str`, *optional*):
1542
+ The name of the library that is making the HTTP request. Will be added to
1543
+ the user-agent header. Example: `"transformers"`.
1544
+ library_version (`str`, *optional*):
1545
+ The version of the library that is making the HTTP request. Will be added
1546
+ to the user-agent header. Example: `"4.24.0"`.
1547
+ user_agent (`str`, `dict`, *optional*):
1548
+ The user agent info in the form of a dictionary or a single string. It will
1549
+ be completed with information about the installed packages.
1550
+ headers (`dict`, *optional*):
1551
+ Additional headers to be sent with each request. Example: `{"X-My-Header": "value"}`.
1552
+ Headers passed here are taking precedence over the default headers.
1553
+ """
1554
+
1566
1555
  def __init__(
1567
1556
  self,
1568
1557
  endpoint: Optional[str] = None,
@@ -1572,32 +1561,6 @@ class HfApi:
1572
1561
  user_agent: Union[Dict, str, None] = None,
1573
1562
  headers: Optional[Dict[str, str]] = None,
1574
1563
  ) -> None:
1575
- """Create a HF client to interact with the Hub via HTTP.
1576
-
1577
- The client is initialized with some high-level settings used in all requests
1578
- made to the Hub (HF endpoint, authentication, user agents...). Using the `HfApi`
1579
- client is preferred but not mandatory as all of its public methods are exposed
1580
- directly at the root of `huggingface_hub`.
1581
-
1582
- Args:
1583
- token (Union[bool, str, None], optional):
1584
- A valid user access token (string). Defaults to the locally saved
1585
- token, which is the recommended method for authentication (see
1586
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
1587
- To disable authentication, pass `False`.
1588
- library_name (`str`, *optional*):
1589
- The name of the library that is making the HTTP request. Will be added to
1590
- the user-agent header. Example: `"transformers"`.
1591
- library_version (`str`, *optional*):
1592
- The version of the library that is making the HTTP request. Will be added
1593
- to the user-agent header. Example: `"4.24.0"`.
1594
- user_agent (`str`, `dict`, *optional*):
1595
- The user agent info in the form of a dictionary or a single string. It will
1596
- be completed with information about the installed packages.
1597
- headers (`dict`, *optional*):
1598
- Additional headers to be sent with each request. Example: `{"X-My-Header": "value"}`.
1599
- Headers passed here are taking precedence over the default headers.
1600
- """
1601
1564
  self.endpoint = endpoint if endpoint is not None else constants.ENDPOINT
1602
1565
  self.token = token
1603
1566
  self.library_name = library_name
@@ -1678,10 +1641,28 @@ class HfApi:
1678
1641
  ) from e
1679
1642
  return r.json()
1680
1643
 
1681
- def get_token_permission(self, token: Union[bool, str, None] = None) -> Literal["read", "write", None]:
1644
+ @_deprecate_method(
1645
+ version="1.0",
1646
+ message=(
1647
+ "Permissions are more complex than when `get_token_permission` was first introduced. "
1648
+ "OAuth and fine-grain tokens allows for more detailed permissions. "
1649
+ "If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key."
1650
+ ),
1651
+ )
1652
+ def get_token_permission(
1653
+ self, token: Union[bool, str, None] = None
1654
+ ) -> Literal["read", "write", "fineGrained", None]:
1682
1655
  """
1683
1656
  Check if a given `token` is valid and return its permissions.
1684
1657
 
1658
+ <Tip warning={true}>
1659
+
1660
+ This method is deprecated and will be removed in version 1.0. Permissions are more complex than when
1661
+ `get_token_permission` was first introduced. OAuth and fine-grain tokens allows for more detailed permissions.
1662
+ If you need to know the permissions associated with a token, please use `whoami` and check the `'auth'` key.
1663
+
1664
+ </Tip>
1665
+
1685
1666
  For more details about tokens, please refer to https://huggingface.co/docs/hub/security-tokens#what-are-user-access-tokens.
1686
1667
 
1687
1668
  Args:
@@ -1692,12 +1673,12 @@ class HfApi:
1692
1673
  To disable authentication, pass `False`.
1693
1674
 
1694
1675
  Returns:
1695
- `Literal["read", "write", None]`: Permission granted by the token ("read" or "write"). Returns `None` if no
1696
- token passed or token is invalid.
1676
+ `Literal["read", "write", "fineGrained", None]`: Permission granted by the token ("read" or "write"). Returns `None` if no
1677
+ token passed, if token is invalid or if role is not returned by the server. This typically happens when the token is an OAuth token.
1697
1678
  """
1698
1679
  try:
1699
1680
  return self.whoami(token=token)["auth"]["accessToken"]["role"]
1700
- except (LocalTokenNotFoundError, HTTPError):
1681
+ except (LocalTokenNotFoundError, HTTPError, KeyError):
1701
1682
  return None
1702
1683
 
1703
1684
  def get_model_tags(self) -> Dict:
@@ -1790,8 +1771,8 @@ class HfApi:
1790
1771
  A tuple of two ints or floats representing a minimum and maximum
1791
1772
  carbon footprint to filter the resulting models with in grams.
1792
1773
  sort (`Literal["last_modified"]` or `str`, *optional*):
1793
- The key with which to sort the resulting models. Possible values
1794
- are the properties of the [`huggingface_hub.hf_api.ModelInfo`] class.
1774
+ The key with which to sort the resulting models. Possible values are "last_modified", "trending_score",
1775
+ "created_at", "downloads" and "likes".
1795
1776
  direction (`Literal[-1]` or `int`, *optional*):
1796
1777
  Direction in which to sort. The value `-1` sorts by descending
1797
1778
  order while all other values sort by ascending order.
@@ -1903,7 +1884,15 @@ class HfApi:
1903
1884
  if len(search_list) > 0:
1904
1885
  params["search"] = search_list
1905
1886
  if sort is not None:
1906
- params["sort"] = "lastModified" if sort == "last_modified" else sort
1887
+ params["sort"] = (
1888
+ "lastModified"
1889
+ if sort == "last_modified"
1890
+ else "trendingScore"
1891
+ if sort == "trending_score"
1892
+ else "createdAt"
1893
+ if sort == "created_at"
1894
+ else sort
1895
+ )
1907
1896
  if direction is not None:
1908
1897
  params["direction"] = direction
1909
1898
  if limit is not None:
@@ -2002,8 +1991,8 @@ class HfApi:
2002
1991
  search (`str`, *optional*):
2003
1992
  A string that will be contained in the returned datasets.
2004
1993
  sort (`Literal["last_modified"]` or `str`, *optional*):
2005
- The key with which to sort the resulting datasets. Possible
2006
- values are the properties of the [`huggingface_hub.hf_api.DatasetInfo`] class.
1994
+ The key with which to sort the resulting models. Possible values are "last_modified", "trending_score",
1995
+ "created_at", "downloads" and "likes".
2007
1996
  direction (`Literal[-1]` or `int`, *optional*):
2008
1997
  Direction in which to sort. The value `-1` sorts by descending
2009
1998
  order while all other values sort by ascending order.
@@ -2113,7 +2102,15 @@ class HfApi:
2113
2102
  if len(search_list) > 0:
2114
2103
  params["search"] = search_list
2115
2104
  if sort is not None:
2116
- params["sort"] = "lastModified" if sort == "last_modified" else sort
2105
+ params["sort"] = (
2106
+ "lastModified"
2107
+ if sort == "last_modified"
2108
+ else "trendingScore"
2109
+ if sort == "trending_score"
2110
+ else "createdAt"
2111
+ if sort == "created_at"
2112
+ else sort
2113
+ )
2117
2114
  if direction is not None:
2118
2115
  params["direction"] = direction
2119
2116
  if limit is not None:
@@ -2133,19 +2130,6 @@ class HfApi:
2133
2130
  item["siblings"] = None
2134
2131
  yield DatasetInfo(**item)
2135
2132
 
2136
- def list_metrics(self) -> List[MetricInfo]:
2137
- """
2138
- Get the public list of all the metrics on huggingface.co
2139
-
2140
- Returns:
2141
- `List[MetricInfo]`: a list of [`MetricInfo`] objects which.
2142
- """
2143
- path = f"{self.endpoint}/api/metrics"
2144
- r = get_session().get(path)
2145
- hf_raise_for_status(r)
2146
- d = r.json()
2147
- return [MetricInfo(**x) for x in d]
2148
-
2149
2133
  @validate_hf_hub_args
2150
2134
  def list_spaces(
2151
2135
  self,
@@ -2185,8 +2169,8 @@ class HfApi:
2185
2169
  linked (`bool`, *optional*):
2186
2170
  Whether to return Spaces that make use of either a model or a dataset.
2187
2171
  sort (`Literal["last_modified"]` or `str`, *optional*):
2188
- The key with which to sort the resulting Spaces. Possible
2189
- values are the properties of the [`huggingface_hub.hf_api.SpaceInfo`]` class.
2172
+ The key with which to sort the resulting models. Possible values are "last_modified", "trending_score",
2173
+ "created_at" and "likes".
2190
2174
  direction (`Literal[-1]` or `int`, *optional*):
2191
2175
  Direction in which to sort. The value `-1` sorts by descending
2192
2176
  order while all other values sort by ascending order.
@@ -2222,7 +2206,15 @@ class HfApi:
2222
2206
  if search is not None:
2223
2207
  params["search"] = search
2224
2208
  if sort is not None:
2225
- params["sort"] = "lastModified" if sort == "last_modified" else sort
2209
+ params["sort"] = (
2210
+ "lastModified"
2211
+ if sort == "last_modified"
2212
+ else "trendingScore"
2213
+ if sort == "trending_score"
2214
+ else "createdAt"
2215
+ if sort == "created_at"
2216
+ else sort
2217
+ )
2226
2218
  if direction is not None:
2227
2219
  params["direction"] = direction
2228
2220
  if limit is not None:
@@ -3407,7 +3399,7 @@ class HfApi:
3407
3399
  repo_id: str,
3408
3400
  *,
3409
3401
  token: Union[str, bool, None] = None,
3410
- private: bool = False,
3402
+ private: Optional[bool] = None,
3411
3403
  repo_type: Optional[str] = None,
3412
3404
  exist_ok: bool = False,
3413
3405
  resource_group_id: Optional[str] = None,
@@ -3429,8 +3421,8 @@ class HfApi:
3429
3421
  token, which is the recommended method for authentication (see
3430
3422
  https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
3431
3423
  To disable authentication, pass `False`.
3432
- private (`bool`, *optional*, defaults to `False`):
3433
- Whether the model repo should be private.
3424
+ private (`bool`, *optional*):
3425
+ Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
3434
3426
  repo_type (`str`, *optional*):
3435
3427
  Set to `"dataset"` or `"space"` if uploading to a dataset or
3436
3428
  space, `None` or `"model"` if uploading to a model. Default is
@@ -3471,7 +3463,9 @@ class HfApi:
3471
3463
  if repo_type not in constants.REPO_TYPES:
3472
3464
  raise ValueError("Invalid repo type")
3473
3465
 
3474
- json: Dict[str, Any] = {"name": name, "organization": organization, "private": private}
3466
+ json: Dict[str, Any] = {"name": name, "organization": organization}
3467
+ if private is not None:
3468
+ json["private"] = private
3475
3469
  if repo_type is not None:
3476
3470
  json["type"] = repo_type
3477
3471
  if repo_type == "space":
@@ -4115,312 +4109,6 @@ class HfApi:
4115
4109
  pr_url=commit_data["pullRequestUrl"] if create_pr else None,
4116
4110
  )
4117
4111
 
4118
- @experimental
4119
- @validate_hf_hub_args
4120
- @_deprecate_method(
4121
- version="0.27", message="This is an experimental feature. Please use `upload_large_folder` instead."
4122
- )
4123
- def create_commits_on_pr(
4124
- self,
4125
- *,
4126
- repo_id: str,
4127
- addition_commits: List[List[CommitOperationAdd]],
4128
- deletion_commits: List[List[CommitOperationDelete]],
4129
- commit_message: str,
4130
- commit_description: Optional[str] = None,
4131
- token: Union[str, bool, None] = None,
4132
- repo_type: Optional[str] = None,
4133
- merge_pr: bool = True,
4134
- num_threads: int = 5, # TODO: use to multithread uploads
4135
- verbose: bool = False,
4136
- ) -> str:
4137
- """Push changes to the Hub in multiple commits.
4138
-
4139
- Commits are pushed to a draft PR branch. If the upload fails or gets interrupted, it can be resumed. Progress
4140
- is tracked in the PR description. At the end of the process, the PR is set as open and the title is updated to
4141
- match the initial commit message. If `merge_pr=True` is passed, the PR is merged automatically.
4142
-
4143
- All deletion commits are pushed first, followed by the addition commits. The order of the commits is not
4144
- guaranteed as we might implement parallel commits in the future. Be sure that your are not updating several
4145
- times the same file.
4146
-
4147
- <Tip warning={true}>
4148
-
4149
- `create_commits_on_pr` is experimental. Its API and behavior is subject to change in the future without prior notice.
4150
-
4151
- </Tip>
4152
-
4153
- <Tip warning={true}>
4154
-
4155
- `create_commits_on_pr` assumes that the repo already exists on the Hub. If you get a Client error 404, please
4156
- make sure you are authenticated and that `repo_id` and `repo_type` are set correctly. If repo does not exist,
4157
- create it first using [`~hf_api.create_repo`].
4158
-
4159
- </Tip>
4160
-
4161
- Args:
4162
- repo_id (`str`):
4163
- The repository in which the commits will be pushed. Example: `"username/my-cool-model"`.
4164
-
4165
- addition_commits (`List` of `List` of [`~hf_api.CommitOperationAdd`]):
4166
- A list containing lists of [`~hf_api.CommitOperationAdd`]. Each sublist will result in a commit on the
4167
- PR.
4168
-
4169
- deletion_commits
4170
- A list containing lists of [`~hf_api.CommitOperationDelete`]. Each sublist will result in a commit on
4171
- the PR. Deletion commits are pushed before addition commits.
4172
-
4173
- commit_message (`str`):
4174
- The summary (first line) of the commit that will be created. Will also be the title of the PR.
4175
-
4176
- commit_description (`str`, *optional*):
4177
- The description of the commit that will be created. The description will be added to the PR.
4178
-
4179
- token (Union[bool, str, None], optional):
4180
- A valid user access token (string). Defaults to the locally saved
4181
- token, which is the recommended method for authentication (see
4182
- https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
4183
- To disable authentication, pass `False`.
4184
-
4185
- repo_type (`str`, *optional*):
4186
- Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to
4187
- a model. Default is `None`.
4188
-
4189
- merge_pr (`bool`):
4190
- If set to `True`, the Pull Request is merged at the end of the process. Defaults to `True`.
4191
-
4192
- num_threads (`int`, *optional*):
4193
- Number of concurrent threads for uploading files. Defaults to 5.
4194
-
4195
- verbose (`bool`):
4196
- If set to `True`, process will run on verbose mode i.e. print information about the ongoing tasks.
4197
- Defaults to `False`.
4198
-
4199
- Returns:
4200
- `str`: URL to the created PR.
4201
-
4202
- Example:
4203
- ```python
4204
- >>> from huggingface_hub import HfApi, plan_multi_commits
4205
- >>> addition_commits, deletion_commits = plan_multi_commits(
4206
- ... operations=[
4207
- ... CommitOperationAdd(...),
4208
- ... CommitOperationAdd(...),
4209
- ... CommitOperationDelete(...),
4210
- ... CommitOperationDelete(...),
4211
- ... CommitOperationAdd(...),
4212
- ... ],
4213
- ... )
4214
- >>> HfApi().create_commits_on_pr(
4215
- ... repo_id="my-cool-model",
4216
- ... addition_commits=addition_commits,
4217
- ... deletion_commits=deletion_commits,
4218
- ... (...)
4219
- ... verbose=True,
4220
- ... )
4221
- ```
4222
-
4223
- Raises:
4224
- [`MultiCommitException`]:
4225
- If an unexpected issue occur in the process: empty commits, unexpected commits in a PR, unexpected PR
4226
- description, etc.
4227
- """
4228
- logger = logging.get_logger(__name__ + ".create_commits_on_pr")
4229
- if verbose:
4230
- logger.setLevel("INFO")
4231
-
4232
- # 1. Get strategy ID
4233
- logger.info(
4234
- f"Will create {len(deletion_commits)} deletion commit(s) and {len(addition_commits)} addition commit(s),"
4235
- f" totalling {sum(len(ops) for ops in addition_commits+deletion_commits)} atomic operations."
4236
- )
4237
- strategy = MultiCommitStrategy(
4238
- addition_commits=[MultiCommitStep(operations=operations) for operations in addition_commits], # type: ignore
4239
- deletion_commits=[MultiCommitStep(operations=operations) for operations in deletion_commits], # type: ignore
4240
- )
4241
- logger.info(f"Multi-commits strategy with ID {strategy.id}.")
4242
-
4243
- # 2. Get or create a PR with this strategy ID
4244
- for discussion in self.get_repo_discussions(repo_id=repo_id, repo_type=repo_type, token=token):
4245
- # search for a draft PR with strategy ID
4246
- if discussion.is_pull_request and discussion.status == "draft" and strategy.id in discussion.title:
4247
- pr = self.get_discussion_details(
4248
- repo_id=repo_id, discussion_num=discussion.num, repo_type=repo_type, token=token
4249
- )
4250
- logger.info(f"PR already exists: {pr.url}. Will resume process where it stopped.")
4251
- break
4252
- else:
4253
- # did not find a PR matching the strategy ID
4254
- pr = multi_commit_create_pull_request(
4255
- self,
4256
- repo_id=repo_id,
4257
- commit_message=commit_message,
4258
- commit_description=commit_description,
4259
- strategy=strategy,
4260
- token=token,
4261
- repo_type=repo_type,
4262
- )
4263
- logger.info(f"New PR created: {pr.url}")
4264
-
4265
- # 3. Parse PR description to check consistency with strategy (e.g. same commits are scheduled)
4266
- for event in pr.events:
4267
- if isinstance(event, DiscussionComment):
4268
- pr_comment = event
4269
- break
4270
- else:
4271
- raise MultiCommitException(f"PR #{pr.num} must have at least 1 comment")
4272
-
4273
- description_commits = multi_commit_parse_pr_description(pr_comment.content)
4274
- if len(description_commits) != len(strategy.all_steps):
4275
- raise MultiCommitException(
4276
- f"Corrupted multi-commit PR #{pr.num}: got {len(description_commits)} steps in"
4277
- f" description but {len(strategy.all_steps)} in strategy."
4278
- )
4279
- for step_id in strategy.all_steps:
4280
- if step_id not in description_commits:
4281
- raise MultiCommitException(
4282
- f"Corrupted multi-commit PR #{pr.num}: expected step {step_id} but didn't find"
4283
- f" it (have {', '.join(description_commits)})."
4284
- )
4285
-
4286
- # 4. Retrieve commit history (and check consistency)
4287
- commits_on_main_branch = {
4288
- commit.commit_id
4289
- for commit in self.list_repo_commits(
4290
- repo_id=repo_id, repo_type=repo_type, token=token, revision=constants.DEFAULT_REVISION
4291
- )
4292
- }
4293
- pr_commits = [
4294
- commit
4295
- for commit in self.list_repo_commits(
4296
- repo_id=repo_id, repo_type=repo_type, token=token, revision=pr.git_reference
4297
- )
4298
- if commit.commit_id not in commits_on_main_branch
4299
- ]
4300
- if len(pr_commits) > 0:
4301
- logger.info(f"Found {len(pr_commits)} existing commits on the PR.")
4302
-
4303
- # At this point `pr_commits` is a list of commits pushed to the PR. We expect all of these commits (if any) to have
4304
- # a step_id as title. We raise exception if an unexpected commit has been pushed.
4305
- if len(pr_commits) > len(strategy.all_steps):
4306
- raise MultiCommitException(
4307
- f"Corrupted multi-commit PR #{pr.num}: scheduled {len(strategy.all_steps)} steps but"
4308
- f" {len(pr_commits)} commits have already been pushed to the PR."
4309
- )
4310
-
4311
- # Check which steps are already completed
4312
- remaining_additions = {step.id: step for step in strategy.addition_commits}
4313
- remaining_deletions = {step.id: step for step in strategy.deletion_commits}
4314
- for commit in pr_commits:
4315
- if commit.title in remaining_additions:
4316
- step = remaining_additions.pop(commit.title)
4317
- step.completed = True
4318
- elif commit.title in remaining_deletions:
4319
- step = remaining_deletions.pop(commit.title)
4320
- step.completed = True
4321
-
4322
- if len(remaining_deletions) > 0 and len(remaining_additions) < len(strategy.addition_commits):
4323
- raise MultiCommitException(
4324
- f"Corrupted multi-commit PR #{pr.num}: some addition commits have already been pushed to the PR but"
4325
- " deletion commits are not all completed yet."
4326
- )
4327
- nb_remaining = len(remaining_deletions) + len(remaining_additions)
4328
- if len(pr_commits) > 0:
4329
- logger.info(
4330
- f"{nb_remaining} commits remaining ({len(remaining_deletions)} deletion commits and"
4331
- f" {len(remaining_additions)} addition commits)"
4332
- )
4333
-
4334
- # 5. Push remaining commits to the PR + update description
4335
- # TODO: multi-thread this
4336
- for step in list(remaining_deletions.values()) + list(remaining_additions.values()):
4337
- # Push new commit
4338
- self.create_commit(
4339
- repo_id=repo_id,
4340
- repo_type=repo_type,
4341
- token=token,
4342
- commit_message=step.id,
4343
- revision=pr.git_reference,
4344
- num_threads=num_threads,
4345
- operations=step.operations,
4346
- create_pr=False,
4347
- )
4348
- step.completed = True
4349
- nb_remaining -= 1
4350
- logger.info(f" step {step.id} completed (still {nb_remaining} to go).")
4351
-
4352
- # Update PR description
4353
- self.edit_discussion_comment(
4354
- repo_id=repo_id,
4355
- repo_type=repo_type,
4356
- token=token,
4357
- discussion_num=pr.num,
4358
- comment_id=pr_comment.id,
4359
- new_content=multi_commit_generate_comment(
4360
- commit_message=commit_message, commit_description=commit_description, strategy=strategy
4361
- ),
4362
- )
4363
- logger.info("All commits have been pushed.")
4364
-
4365
- # 6. Update PR (and merge)
4366
- self.rename_discussion(
4367
- repo_id=repo_id,
4368
- repo_type=repo_type,
4369
- token=token,
4370
- discussion_num=pr.num,
4371
- new_title=commit_message,
4372
- )
4373
- self.change_discussion_status(
4374
- repo_id=repo_id,
4375
- repo_type=repo_type,
4376
- token=token,
4377
- discussion_num=pr.num,
4378
- new_status="open",
4379
- comment=MULTI_COMMIT_PR_COMPLETION_COMMENT_TEMPLATE,
4380
- )
4381
- logger.info("PR is now open for reviews.")
4382
-
4383
- if merge_pr: # User don't want a PR => merge it
4384
- try:
4385
- self.merge_pull_request(
4386
- repo_id=repo_id,
4387
- repo_type=repo_type,
4388
- token=token,
4389
- discussion_num=pr.num,
4390
- comment=MULTI_COMMIT_PR_CLOSING_COMMENT_TEMPLATE,
4391
- )
4392
- logger.info("PR has been automatically merged (`merge_pr=True` was passed).")
4393
- except BadRequestError as error:
4394
- if error.server_message is not None and "no associated changes" in error.server_message:
4395
- # PR cannot be merged as no changes are associated. We close the PR without merging with a comment to
4396
- # explain.
4397
- self.change_discussion_status(
4398
- repo_id=repo_id,
4399
- repo_type=repo_type,
4400
- token=token,
4401
- discussion_num=pr.num,
4402
- comment=MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_NO_CHANGES_TEMPLATE,
4403
- new_status="closed",
4404
- )
4405
- logger.warning("Couldn't merge the PR: no associated changes.")
4406
- else:
4407
- # PR cannot be merged for another reason (conflicting files for example). We comment the PR to explain
4408
- # and re-raise the exception.
4409
- self.comment_discussion(
4410
- repo_id=repo_id,
4411
- repo_type=repo_type,
4412
- token=token,
4413
- discussion_num=pr.num,
4414
- comment=MULTI_COMMIT_PR_CLOSE_COMMENT_FAILURE_BAD_REQUEST_TEMPLATE.format(
4415
- error_message=error.server_message
4416
- ),
4417
- )
4418
- raise MultiCommitException(
4419
- f"Couldn't merge Pull Request in multi-commit: {error.server_message}"
4420
- ) from error
4421
-
4422
- return pr.url
4423
-
4424
4112
  def preupload_lfs_files(
4425
4113
  self,
4426
4114
  repo_id: str,
@@ -4794,8 +4482,6 @@ class HfApi:
4794
4482
  allow_patterns: Optional[Union[List[str], str]] = None,
4795
4483
  ignore_patterns: Optional[Union[List[str], str]] = None,
4796
4484
  delete_patterns: Optional[Union[List[str], str]] = None,
4797
- multi_commits: Literal[False] = ...,
4798
- multi_commits_verbose: bool = False,
4799
4485
  run_as_future: Literal[False] = ...,
4800
4486
  ) -> CommitInfo: ...
4801
4487
 
@@ -4816,57 +4502,9 @@ class HfApi:
4816
4502
  allow_patterns: Optional[Union[List[str], str]] = None,
4817
4503
  ignore_patterns: Optional[Union[List[str], str]] = None,
4818
4504
  delete_patterns: Optional[Union[List[str], str]] = None,
4819
- multi_commits: Literal[True] = ...,
4820
- multi_commits_verbose: bool = False,
4821
- run_as_future: Literal[False] = ...,
4822
- ) -> str: # Only the PR url in multi-commits mode
4823
- ...
4824
-
4825
- @overload
4826
- def upload_folder( # type: ignore
4827
- self,
4828
- *,
4829
- repo_id: str,
4830
- folder_path: Union[str, Path],
4831
- path_in_repo: Optional[str] = None,
4832
- commit_message: Optional[str] = None,
4833
- commit_description: Optional[str] = None,
4834
- token: Union[str, bool, None] = None,
4835
- repo_type: Optional[str] = None,
4836
- revision: Optional[str] = None,
4837
- create_pr: Optional[bool] = None,
4838
- parent_commit: Optional[str] = None,
4839
- allow_patterns: Optional[Union[List[str], str]] = None,
4840
- ignore_patterns: Optional[Union[List[str], str]] = None,
4841
- delete_patterns: Optional[Union[List[str], str]] = None,
4842
- multi_commits: Literal[False] = ...,
4843
- multi_commits_verbose: bool = False,
4844
4505
  run_as_future: Literal[True] = ...,
4845
4506
  ) -> Future[CommitInfo]: ...
4846
4507
 
4847
- @overload
4848
- def upload_folder(
4849
- self,
4850
- *,
4851
- repo_id: str,
4852
- folder_path: Union[str, Path],
4853
- path_in_repo: Optional[str] = None,
4854
- commit_message: Optional[str] = None,
4855
- commit_description: Optional[str] = None,
4856
- token: Union[str, bool, None] = None,
4857
- repo_type: Optional[str] = None,
4858
- revision: Optional[str] = None,
4859
- create_pr: Optional[bool] = None,
4860
- parent_commit: Optional[str] = None,
4861
- allow_patterns: Optional[Union[List[str], str]] = None,
4862
- ignore_patterns: Optional[Union[List[str], str]] = None,
4863
- delete_patterns: Optional[Union[List[str], str]] = None,
4864
- multi_commits: Literal[True] = ...,
4865
- multi_commits_verbose: bool = False,
4866
- run_as_future: Literal[True] = ...,
4867
- ) -> Future[str]: # Only the PR url in multi-commits mode
4868
- ...
4869
-
4870
4508
  @validate_hf_hub_args
4871
4509
  @future_compatible
4872
4510
  def upload_folder(
@@ -4885,10 +4523,8 @@ class HfApi:
4885
4523
  allow_patterns: Optional[Union[List[str], str]] = None,
4886
4524
  ignore_patterns: Optional[Union[List[str], str]] = None,
4887
4525
  delete_patterns: Optional[Union[List[str], str]] = None,
4888
- multi_commits: bool = False,
4889
- multi_commits_verbose: bool = False,
4890
4526
  run_as_future: bool = False,
4891
- ) -> Union[CommitInfo, str, Future[CommitInfo], Future[str]]:
4527
+ ) -> Union[CommitInfo, Future[CommitInfo]]:
4892
4528
  """
4893
4529
  Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or
4894
4530
  git-lfs to be installed.
@@ -4941,8 +4577,7 @@ class HfApi:
4941
4577
  Whether or not to create a Pull Request with that commit. Defaults to `False`. If `revision` is not
4942
4578
  set, PR is opened against the `"main"` branch. If `revision` is set and is a branch, PR is opened
4943
4579
  against this branch. If `revision` is set and is not a branch name (example: a commit oid), an
4944
- `RevisionNotFoundError` is returned by the server. If both `multi_commits` and `create_pr` are True,
4945
- the PR created in the multi-commit process is kept opened.
4580
+ `RevisionNotFoundError` is returned by the server.
4946
4581
  parent_commit (`str`, *optional*):
4947
4582
  The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
4948
4583
  If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
@@ -4957,12 +4592,6 @@ class HfApi:
4957
4592
  If provided, remote files matching any of the patterns will be deleted from the repo while committing
4958
4593
  new files. This is useful if you don't know which files have already been uploaded.
4959
4594
  Note: to avoid discrepancies the `.gitattributes` file is not deleted even if it matches the pattern.
4960
- multi_commits (`bool`):
4961
- Deprecated. For large uploads, use `upload_large_folder` instead.
4962
- If True, changes are pushed to a PR using a multi-commit process. Defaults to `False`.
4963
- multi_commits_verbose (`bool`):
4964
- Deprecated. For large uploads, use `upload_large_folder` instead.
4965
- If True and `multi_commits` is used, more information will be displayed to the user.
4966
4595
  run_as_future (`bool`, *optional*):
4967
4596
  Whether or not to run this method in the background. Background jobs are run sequentially without
4968
4597
  blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects)
@@ -4973,9 +4602,6 @@ class HfApi:
4973
4602
  Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit
4974
4603
  url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will
4975
4604
  contain the result when executed.
4976
- [`str`] or `Future`:
4977
- If `multi_commits=True`, returns the url of the PR created to push the changes. If `run_as_future=True`
4978
- is passed, returns a Future object which will contain the result when executed.
4979
4605
 
4980
4606
  <Tip>
4981
4607
 
@@ -4996,9 +4622,9 @@ class HfApi:
4996
4622
 
4997
4623
  </Tip>
4998
4624
 
4999
- <Tip warning={true}>
4625
+ <Tip>
5000
4626
 
5001
- `multi_commits` is experimental. Its API and behavior is subject to change in the future without prior notice.
4627
+ When dealing with a large folder (thousands of files or hundreds of GB), we recommend using [`~hf_api.upload_large_folder`] instead.
5002
4628
 
5003
4629
  </Tip>
5004
4630
 
@@ -5044,10 +4670,6 @@ class HfApi:
5044
4670
  if repo_type not in constants.REPO_TYPES:
5045
4671
  raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}")
5046
4672
 
5047
- if multi_commits:
5048
- if revision is not None and revision != constants.DEFAULT_REVISION:
5049
- raise ValueError("Cannot use `multi_commit` to commit changes other than the main branch.")
5050
-
5051
4673
  # By default, upload folder to the root directory in repo.
5052
4674
  if path_in_repo is None:
5053
4675
  path_in_repo = ""
@@ -5085,22 +4707,6 @@ class HfApi:
5085
4707
  commit_operations = delete_operations + add_operations
5086
4708
 
5087
4709
  commit_message = commit_message or "Upload folder using huggingface_hub"
5088
- if multi_commits:
5089
- addition_commits, deletion_commits = plan_multi_commits(operations=commit_operations)
5090
- pr_url = self.create_commits_on_pr(
5091
- repo_id=repo_id,
5092
- repo_type=repo_type,
5093
- addition_commits=addition_commits,
5094
- deletion_commits=deletion_commits,
5095
- commit_message=commit_message,
5096
- commit_description=commit_description,
5097
- token=token,
5098
- merge_pr=not create_pr,
5099
- verbose=multi_commits_verbose,
5100
- )
5101
- # Defining a CommitInfo object is not really relevant in this case
5102
- # Let's return early with pr_url only (as string).
5103
- return pr_url
5104
4710
 
5105
4711
  commit_info = self.create_commit(
5106
4712
  repo_type=repo_type,
@@ -5373,7 +4979,7 @@ class HfApi:
5373
4979
  *,
5374
4980
  repo_type: str, # Repo type is required!
5375
4981
  revision: Optional[str] = None,
5376
- private: bool = False,
4982
+ private: Optional[bool] = None,
5377
4983
  allow_patterns: Optional[Union[List[str], str]] = None,
5378
4984
  ignore_patterns: Optional[Union[List[str], str]] = None,
5379
4985
  num_workers: Optional[int] = None,
@@ -5401,7 +5007,8 @@ class HfApi:
5401
5007
  revision (`str`, `optional`):
5402
5008
  The branch to commit to. If not provided, the `main` branch will be used.
5403
5009
  private (`bool`, `optional`):
5404
- Whether the repository should be private. Defaults to False.
5010
+ Whether the repository should be private.
5011
+ If `None` (default), the repo will be public unless the organization's default is private.
5405
5012
  allow_patterns (`List[str]` or `str`, *optional*):
5406
5013
  If provided, only files matching at least one pattern are uploaded.
5407
5014
  ignore_patterns (`List[str]` or `str`, *optional*):
@@ -8622,7 +8229,7 @@ class HfApi:
8622
8229
  Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`.
8623
8230
  item_object_id (`str`):
8624
8231
  ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id).
8625
- It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0]._id`.
8232
+ It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`.
8626
8233
  missing_ok (`bool`, *optional*):
8627
8234
  If `True`, do not raise an error if item doesn't exists.
8628
8235
  token (Union[bool, str, None], optional):
@@ -9469,7 +9076,6 @@ class HfApi:
9469
9076
  def _build_hf_headers(
9470
9077
  self,
9471
9078
  token: Union[bool, str, None] = None,
9472
- is_write_action: bool = False,
9473
9079
  library_name: Optional[str] = None,
9474
9080
  library_version: Optional[str] = None,
9475
9081
  user_agent: Union[Dict, str, None] = None,
@@ -9483,7 +9089,6 @@ class HfApi:
9483
9089
  token = self.token
9484
9090
  return build_hf_headers(
9485
9091
  token=token,
9486
- is_write_action=is_write_action,
9487
9092
  library_name=library_name or self.library_name,
9488
9093
  library_version=library_version or self.library_version,
9489
9094
  user_agent=user_agent or self.user_agent,
@@ -9570,12 +9175,13 @@ class HfApi:
9570
9175
  # It's better to fail early than to fail after all the files have been hashed.
9571
9176
  if "README.md" in filtered_repo_objects:
9572
9177
  self._validate_yaml(
9573
- content=relpath_to_abspath["README.md"].read_text(),
9178
+ content=relpath_to_abspath["README.md"].read_text(encoding="utf8"),
9574
9179
  repo_type=repo_type,
9575
9180
  token=token,
9576
9181
  )
9577
9182
  if len(filtered_repo_objects) > 30:
9578
- logger.info(
9183
+ log = logger.warning if len(filtered_repo_objects) > 200 else logger.info
9184
+ log(
9579
9185
  "It seems you are trying to upload a large folder at once. This might take some time and then fail if "
9580
9186
  "the folder is too large. For such cases, it is recommended to upload in smaller batches or to use "
9581
9187
  "`HfApi().upload_large_folder(...)`/`huggingface-cli upload-large-folder` instead. For more details, "
@@ -9918,7 +9524,6 @@ list_repo_refs = api.list_repo_refs
9918
9524
  list_repo_commits = api.list_repo_commits
9919
9525
  list_repo_tree = api.list_repo_tree
9920
9526
  get_paths_info = api.get_paths_info
9921
- list_metrics = api.list_metrics
9922
9527
 
9923
9528
  get_model_tags = api.get_model_tags
9924
9529
  get_dataset_tags = api.get_dataset_tags
@@ -9935,7 +9540,6 @@ upload_folder = api.upload_folder
9935
9540
  delete_file = api.delete_file
9936
9541
  delete_folder = api.delete_folder
9937
9542
  delete_files = api.delete_files
9938
- create_commits_on_pr = api.create_commits_on_pr
9939
9543
  upload_large_folder = api.upload_large_folder
9940
9544
  preupload_lfs_files = api.preupload_lfs_files
9941
9545
  create_branch = api.create_branch