huggingface-hub 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (35) hide show
  1. huggingface_hub/__init__.py +19 -1
  2. huggingface_hub/_commit_api.py +49 -20
  3. huggingface_hub/_inference_endpoints.py +10 -0
  4. huggingface_hub/_login.py +2 -2
  5. huggingface_hub/commands/download.py +1 -1
  6. huggingface_hub/file_download.py +57 -21
  7. huggingface_hub/hf_api.py +269 -54
  8. huggingface_hub/hf_file_system.py +131 -8
  9. huggingface_hub/hub_mixin.py +204 -42
  10. huggingface_hub/inference/_client.py +56 -9
  11. huggingface_hub/inference/_common.py +4 -3
  12. huggingface_hub/inference/_generated/_async_client.py +57 -9
  13. huggingface_hub/inference/_text_generation.py +5 -0
  14. huggingface_hub/inference/_types.py +17 -0
  15. huggingface_hub/lfs.py +6 -3
  16. huggingface_hub/repocard.py +5 -3
  17. huggingface_hub/repocard_data.py +11 -3
  18. huggingface_hub/serialization/__init__.py +19 -0
  19. huggingface_hub/serialization/_base.py +168 -0
  20. huggingface_hub/serialization/_numpy.py +67 -0
  21. huggingface_hub/serialization/_tensorflow.py +93 -0
  22. huggingface_hub/serialization/_torch.py +199 -0
  23. huggingface_hub/templates/datasetcard_template.md +1 -1
  24. huggingface_hub/templates/modelcard_template.md +1 -4
  25. huggingface_hub/utils/__init__.py +14 -10
  26. huggingface_hub/utils/_datetime.py +4 -11
  27. huggingface_hub/utils/_errors.py +29 -0
  28. huggingface_hub/utils/_runtime.py +21 -15
  29. huggingface_hub/utils/endpoint_helpers.py +27 -1
  30. {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/METADATA +7 -3
  31. {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/RECORD +35 -30
  32. {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/LICENSE +0 -0
  33. {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/WHEEL +0 -0
  34. {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/entry_points.txt +0 -0
  35. {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/top_level.txt +0 -0
huggingface_hub/hf_api.py CHANGED
@@ -20,7 +20,7 @@ import re
20
20
  import struct
21
21
  import warnings
22
22
  from concurrent.futures import Future, ThreadPoolExecutor
23
- from dataclasses import dataclass, field
23
+ from dataclasses import asdict, dataclass, field
24
24
  from datetime import datetime
25
25
  from functools import wraps
26
26
  from itertools import islice
@@ -36,12 +36,11 @@ from typing import (
36
36
  Literal,
37
37
  Optional,
38
38
  Tuple,
39
- TypedDict,
40
39
  TypeVar,
41
40
  Union,
42
41
  overload,
43
42
  )
44
- from urllib.parse import quote, urlencode
43
+ from urllib.parse import quote
45
44
 
46
45
  import requests
47
46
  from requests.exceptions import HTTPError
@@ -53,7 +52,7 @@ from ._commit_api import (
53
52
  CommitOperationAdd,
54
53
  CommitOperationCopy,
55
54
  CommitOperationDelete,
56
- _fetch_lfs_files_to_copy,
55
+ _fetch_files_to_copy,
57
56
  _fetch_upload_modes,
58
57
  _prepare_commit_payload,
59
58
  _upload_lfs_files,
@@ -130,7 +129,7 @@ from .utils import ( # noqa: F401 # imported for backward compatibility
130
129
  validate_hf_hub_args,
131
130
  )
132
131
  from .utils import tqdm as hf_tqdm
133
- from .utils._deprecation import _deprecate_method
132
+ from .utils._deprecation import _deprecate_arguments, _deprecate_method
134
133
  from .utils._typing import CallableT
135
134
  from .utils.endpoint_helpers import (
136
135
  DatasetFilter,
@@ -238,36 +237,56 @@ def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> Tu
238
237
  return repo_type, namespace, repo_id
239
238
 
240
239
 
241
- class LastCommitInfo(TypedDict, total=False):
240
+ @dataclass
241
+ class LastCommitInfo(dict):
242
242
  oid: str
243
243
  title: str
244
244
  date: datetime
245
245
 
246
+ def __post_init__(self): # hack to make LastCommitInfo backward compatible
247
+ self.update(asdict(self))
248
+
246
249
 
247
- class BlobLfsInfo(TypedDict, total=False):
250
+ @dataclass
251
+ class BlobLfsInfo(dict):
248
252
  size: int
249
253
  sha256: str
250
254
  pointer_size: int
251
255
 
256
+ def __post_init__(self): # hack to make BlobLfsInfo backward compatible
257
+ self.update(asdict(self))
252
258
 
253
- class BlobSecurityInfo(TypedDict, total=False):
259
+
260
+ @dataclass
261
+ class BlobSecurityInfo(dict):
254
262
  safe: bool
255
263
  av_scan: Optional[Dict]
256
264
  pickle_import_scan: Optional[Dict]
257
265
 
266
+ def __post_init__(self): # hack to make BlogSecurityInfo backward compatible
267
+ self.update(asdict(self))
268
+
258
269
 
259
- class TransformersInfo(TypedDict, total=False):
270
+ @dataclass
271
+ class TransformersInfo(dict):
260
272
  auto_model: str
261
- custom_class: Optional[str]
273
+ custom_class: Optional[str] = None
262
274
  # possible `pipeline_tag` values: https://github.com/huggingface/huggingface.js/blob/3ee32554b8620644a6287e786b2a83bf5caf559c/packages/tasks/src/pipelines.ts#L72
263
- pipeline_tag: Optional[str]
264
- processor: Optional[str]
275
+ pipeline_tag: Optional[str] = None
276
+ processor: Optional[str] = None
277
+
278
+ def __post_init__(self): # hack to make TransformersInfo backward compatible
279
+ self.update(asdict(self))
265
280
 
266
281
 
267
- class SafeTensorsInfo(TypedDict, total=False):
282
+ @dataclass
283
+ class SafeTensorsInfo(dict):
268
284
  parameters: List[Dict[str, int]]
269
285
  total: int
270
286
 
287
+ def __post_init__(self): # hack to make SafeTensorsInfo backward compatible
288
+ self.update(asdict(self))
289
+
271
290
 
272
291
  @dataclass
273
292
  class CommitInfo(str):
@@ -656,7 +675,7 @@ class ModelInfo:
656
675
  ModelCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
657
676
  )
658
677
 
659
- self.widget_data = kwargs.pop("widget_data", None)
678
+ self.widget_data = kwargs.pop("widgetData", None)
660
679
  self.model_index = kwargs.pop("model-index", None) or kwargs.pop("model_index", None)
661
680
  self.config = kwargs.pop("config", None)
662
681
  transformers_info = kwargs.pop("transformersInfo", None) or kwargs.pop("transformers_info", None)
@@ -1094,7 +1113,7 @@ class GitRefs:
1094
1113
  branches: List[GitRefInfo]
1095
1114
  converts: List[GitRefInfo]
1096
1115
  tags: List[GitRefInfo]
1097
- pull_requests: Optional[List[GitRefInfo]]
1116
+ pull_requests: Optional[List[GitRefInfo]] = None
1098
1117
 
1099
1118
 
1100
1119
  @dataclass
@@ -1361,6 +1380,12 @@ class HfApi:
1361
1380
  *,
1362
1381
  filter: Union[ModelFilter, str, Iterable[str], None] = None,
1363
1382
  author: Optional[str] = None,
1383
+ library: Optional[Union[str, List[str]]] = None,
1384
+ language: Optional[Union[str, List[str]]] = None,
1385
+ model_name: Optional[str] = None,
1386
+ task: Optional[Union[str, List[str]]] = None,
1387
+ trained_dataset: Optional[Union[str, List[str]]] = None,
1388
+ tags: Optional[Union[str, List[str]]] = None,
1364
1389
  search: Optional[str] = None,
1365
1390
  emissions_thresholds: Optional[Tuple[float, float]] = None,
1366
1391
  sort: Union[Literal["last_modified"], str, None] = None,
@@ -1370,6 +1395,7 @@ class HfApi:
1370
1395
  cardData: bool = False,
1371
1396
  fetch_config: bool = False,
1372
1397
  token: Optional[Union[bool, str]] = None,
1398
+ pipeline_tag: Optional[str] = None,
1373
1399
  ) -> Iterable[ModelInfo]:
1374
1400
  """
1375
1401
  List models hosted on the Huggingface Hub, given some filters.
@@ -1381,6 +1407,24 @@ class HfApi:
1381
1407
  author (`str`, *optional*):
1382
1408
  A string which identify the author (user or organization) of the
1383
1409
  returned models
1410
+ library (`str` or `List`, *optional*):
1411
+ A string or list of strings of foundational libraries models were
1412
+ originally trained from, such as pytorch, tensorflow, or allennlp.
1413
+ language (`str` or `List`, *optional*):
1414
+ A string or list of strings of languages, both by name and country
1415
+ code, such as "en" or "English"
1416
+ model_name (`str`, *optional*):
1417
+ A string that contain complete or partial names for models on the
1418
+ Hub, such as "bert" or "bert-base-cased"
1419
+ task (`str` or `List`, *optional*):
1420
+ A string or list of strings of tasks models were designed for, such
1421
+ as: "fill-mask" or "automatic-speech-recognition"
1422
+ trained_dataset (`str` or `List`, *optional*):
1423
+ A string tag or a list of string tags of the trained dataset for a
1424
+ model on the Hub.
1425
+ tags (`str` or `List`, *optional*):
1426
+ A string tag or a list of tags to filter models on the Hub by, such
1427
+ as `text-generation` or `spacy`.
1384
1428
  search (`str`, *optional*):
1385
1429
  A string that will be contained in the returned model ids.
1386
1430
  emissions_thresholds (`Tuple`, *optional*):
@@ -1411,6 +1455,9 @@ class HfApi:
1411
1455
  If `None` or `True` and machine is logged in (through `huggingface-cli login`
1412
1456
  or [`~huggingface_hub.login`]), token will be retrieved from the cache.
1413
1457
  If `False`, token is not sent in the request header.
1458
+ pipeline_tag (`str`, *optional*):
1459
+ A string pipeline tag to filter models on the Hub by, such as `summarization`
1460
+
1414
1461
 
1415
1462
  Returns:
1416
1463
  `Iterable[ModelInfo]`: an iterable of [`huggingface_hub.hf_api.ModelInfo`] objects.
@@ -1427,9 +1474,6 @@ class HfApi:
1427
1474
 
1428
1475
  >>> # List only the text classification models
1429
1476
  >>> api.list_models(filter="text-classification")
1430
- >>> # Using the `ModelFilter`
1431
- >>> filt = ModelFilter(task="text-classification")
1432
-
1433
1477
 
1434
1478
  >>> # List only models from the AllenNLP library
1435
1479
  >>> api.list_models(filter="allennlp")
@@ -1455,15 +1499,38 @@ class HfApi:
1455
1499
  path = f"{self.endpoint}/api/models"
1456
1500
  headers = self._build_hf_headers(token=token)
1457
1501
  params = {}
1502
+ filter_list = []
1503
+
1458
1504
  if filter is not None:
1459
1505
  if isinstance(filter, ModelFilter):
1460
1506
  params = self._unpack_model_filter(filter)
1461
1507
  else:
1462
1508
  params.update({"filter": filter})
1509
+
1463
1510
  params.update({"full": True})
1464
- if author is not None:
1511
+
1512
+ # Build the filter list
1513
+ if author:
1465
1514
  params.update({"author": author})
1466
- if search is not None:
1515
+ if model_name:
1516
+ params.update({"search": model_name})
1517
+ if library:
1518
+ filter_list.extend([library] if isinstance(library, str) else library)
1519
+ if task:
1520
+ filter_list.extend([task] if isinstance(task, str) else task)
1521
+ if trained_dataset:
1522
+ if not isinstance(trained_dataset, (list, tuple)):
1523
+ trained_dataset = [trained_dataset]
1524
+ for dataset in trained_dataset:
1525
+ if not dataset.startswith("dataset:"):
1526
+ dataset = f"dataset:{dataset}"
1527
+ filter_list.append(dataset)
1528
+ if language:
1529
+ filter_list.extend([language] if isinstance(language, str) else language)
1530
+ if tags:
1531
+ filter_list.extend([tags] if isinstance(tags, str) else tags)
1532
+
1533
+ if search:
1467
1534
  params.update({"search": search})
1468
1535
  if sort is not None:
1469
1536
  params.update({"sort": "lastModified" if sort == "last_modified" else sort})
@@ -1480,6 +1547,13 @@ class HfApi:
1480
1547
  params.update({"config": True})
1481
1548
  if cardData:
1482
1549
  params.update({"cardData": True})
1550
+ if pipeline_tag:
1551
+ params.update({"pipeline_tag": pipeline_tag})
1552
+
1553
+ filter_value = params.get("filter", [])
1554
+ if filter_value:
1555
+ filter_list.extend([filter_value] if isinstance(filter_value, str) else list(filter_value))
1556
+ params.update({"filter": filter_list})
1483
1557
 
1484
1558
  # `items` is a generator
1485
1559
  items = paginate(path, params=params, headers=headers)
@@ -1499,21 +1573,21 @@ class HfApi:
1499
1573
  model_str = ""
1500
1574
 
1501
1575
  # Handling author
1502
- if model_filter.author is not None:
1576
+ if model_filter.author:
1503
1577
  model_str = f"{model_filter.author}/"
1504
1578
 
1505
1579
  # Handling model_name
1506
- if model_filter.model_name is not None:
1580
+ if model_filter.model_name:
1507
1581
  model_str += model_filter.model_name
1508
1582
 
1509
1583
  filter_list: List[str] = []
1510
1584
 
1511
1585
  # Handling tasks
1512
- if model_filter.task is not None:
1586
+ if model_filter.task:
1513
1587
  filter_list.extend([model_filter.task] if isinstance(model_filter.task, str) else model_filter.task)
1514
1588
 
1515
1589
  # Handling dataset
1516
- if model_filter.trained_dataset is not None:
1590
+ if model_filter.trained_dataset:
1517
1591
  if not isinstance(model_filter.trained_dataset, (list, tuple)):
1518
1592
  model_filter.trained_dataset = [model_filter.trained_dataset]
1519
1593
  for dataset in model_filter.trained_dataset:
@@ -1532,7 +1606,7 @@ class HfApi:
1532
1606
  filter_list.extend([model_filter.tags] if isinstance(model_filter.tags, str) else model_filter.tags)
1533
1607
 
1534
1608
  query_dict: Dict[str, Any] = {}
1535
- if model_str is not None:
1609
+ if model_str:
1536
1610
  query_dict["search"] = model_str
1537
1611
  if isinstance(model_filter.language, list):
1538
1612
  filter_list.extend(model_filter.language)
@@ -1547,8 +1621,16 @@ class HfApi:
1547
1621
  *,
1548
1622
  filter: Union[DatasetFilter, str, Iterable[str], None] = None,
1549
1623
  author: Optional[str] = None,
1624
+ benchmark: Optional[Union[str, List[str]]] = None,
1625
+ dataset_name: Optional[str] = None,
1626
+ language_creators: Optional[Union[str, List[str]]] = None,
1627
+ language: Optional[Union[str, List[str]]] = None,
1628
+ multilinguality: Optional[Union[str, List[str]]] = None,
1629
+ size_categories: Optional[Union[str, List[str]]] = None,
1630
+ task_categories: Optional[Union[str, List[str]]] = None,
1631
+ task_ids: Optional[Union[str, List[str]]] = None,
1550
1632
  search: Optional[str] = None,
1551
- sort: Union[Literal["last_modified"], str, None] = None,
1633
+ sort: Optional[Union[Literal["last_modified"], str]] = None,
1552
1634
  direction: Optional[Literal[-1]] = None,
1553
1635
  limit: Optional[int] = None,
1554
1636
  full: Optional[bool] = None,
@@ -1563,6 +1645,34 @@ class HfApi:
1563
1645
  datasets on the hub.
1564
1646
  author (`str`, *optional*):
1565
1647
  A string which identify the author of the returned datasets.
1648
+ benchmark (`str` or `List`, *optional*):
1649
+ A string or list of strings that can be used to identify datasets on
1650
+ the Hub by their official benchmark.
1651
+ dataset_name (`str`, *optional*):
1652
+ A string or list of strings that can be used to identify datasets on
1653
+ the Hub by its name, such as `SQAC` or `wikineural`
1654
+ language_creators (`str` or `List`, *optional*):
1655
+ A string or list of strings that can be used to identify datasets on
1656
+ the Hub with how the data was curated, such as `crowdsourced` or
1657
+ `machine_generated`.
1658
+ language (`str` or `List`, *optional*):
1659
+ A string or list of strings representing a two-character language to
1660
+ filter datasets by on the Hub.
1661
+ multilinguality (`str` or `List`, *optional*):
1662
+ A string or list of strings representing a filter for datasets that
1663
+ contain multiple languages.
1664
+ size_categories (`str` or `List`, *optional*):
1665
+ A string or list of strings that can be used to identify datasets on
1666
+ the Hub by the size of the dataset such as `100K<n<1M` or
1667
+ `1M<n<10M`.
1668
+ task_categories (`str` or `List`, *optional*):
1669
+ A string or list of strings that can be used to identify datasets on
1670
+ the Hub by the designed task, such as `audio_classification` or
1671
+ `named_entity_recognition`.
1672
+ task_ids (`str` or `List`, *optional*):
1673
+ A string or list of strings that can be used to identify datasets on
1674
+ the Hub by the specific task such as `speech_emotion_recognition` or
1675
+ `paraphrase`.
1566
1676
  search (`str`, *optional*):
1567
1677
  A string that will be contained in the returned datasets.
1568
1678
  sort (`Literal["last_modified"]` or `str`, *optional*):
@@ -1600,16 +1710,12 @@ class HfApi:
1600
1710
 
1601
1711
  >>> # List only the text classification datasets
1602
1712
  >>> api.list_datasets(filter="task_categories:text-classification")
1603
- >>> # Using the `DatasetFilter`
1604
- >>> filt = DatasetFilter(task_categories="text-classification")
1605
1713
 
1606
1714
 
1607
1715
  >>> # List only the datasets in russian for language modeling
1608
1716
  >>> api.list_datasets(
1609
1717
  ... filter=("language:ru", "task_ids:language-modeling")
1610
1718
  ... )
1611
- >>> # Using the `DatasetFilter`
1612
- >>> filt = DatasetFilter(language="ru", task_ids="language-modeling")
1613
1719
 
1614
1720
  >>> api.list_datasets(filter=filt)
1615
1721
  ```
@@ -1631,14 +1737,38 @@ class HfApi:
1631
1737
  path = f"{self.endpoint}/api/datasets"
1632
1738
  headers = self._build_hf_headers(token=token)
1633
1739
  params = {}
1740
+ filter_list = []
1741
+
1634
1742
  if filter is not None:
1635
1743
  if isinstance(filter, DatasetFilter):
1636
1744
  params = self._unpack_dataset_filter(filter)
1637
1745
  else:
1638
1746
  params.update({"filter": filter})
1639
- if author is not None:
1747
+
1748
+ # Build the filter list
1749
+ if author:
1640
1750
  params.update({"author": author})
1641
- if search is not None:
1751
+ if dataset_name:
1752
+ params.update({"search": dataset_name})
1753
+
1754
+ for attr in (
1755
+ benchmark,
1756
+ language_creators,
1757
+ language,
1758
+ multilinguality,
1759
+ size_categories,
1760
+ task_categories,
1761
+ task_ids,
1762
+ ):
1763
+ if attr:
1764
+ if not isinstance(attr, (list, tuple)):
1765
+ attr = [attr]
1766
+ for data in attr:
1767
+ if not data.startswith(f"{attr}:"):
1768
+ data = f"{attr}:{data}"
1769
+ filter_list.append(data)
1770
+
1771
+ if search:
1642
1772
  params.update({"search": search})
1643
1773
  if sort is not None:
1644
1774
  params.update({"sort": "lastModified" if sort == "last_modified" else sort})
@@ -1649,6 +1779,11 @@ class HfApi:
1649
1779
  if full:
1650
1780
  params.update({"full": True})
1651
1781
 
1782
+ filter_value = params.get("filter", [])
1783
+ if filter_value:
1784
+ filter_list.extend([filter_value] if isinstance(filter_value, str) else list(filter_value))
1785
+ params.update({"filter": filter_list})
1786
+
1652
1787
  items = paginate(path, params=params, headers=headers)
1653
1788
  if limit is not None:
1654
1789
  items = islice(items, limit) # Do not iterate over all pages
@@ -1664,11 +1799,11 @@ class HfApi:
1664
1799
  dataset_str = ""
1665
1800
 
1666
1801
  # Handling author
1667
- if dataset_filter.author is not None:
1802
+ if dataset_filter.author:
1668
1803
  dataset_str = f"{dataset_filter.author}/"
1669
1804
 
1670
1805
  # Handling dataset_name
1671
- if dataset_filter.dataset_name is not None:
1806
+ if dataset_filter.dataset_name:
1672
1807
  dataset_str += dataset_filter.dataset_name
1673
1808
 
1674
1809
  filter_list = []
@@ -2307,18 +2442,14 @@ class HfApi:
2307
2442
  Returns:
2308
2443
  True if the repository exists, False otherwise.
2309
2444
 
2310
- <Tip>
2311
-
2312
2445
  Examples:
2313
2446
  ```py
2314
2447
  >>> from huggingface_hub import repo_exists
2315
- >>> repo_exists("huggingface/transformers")
2448
+ >>> repo_exists("google/gemma-7b")
2316
2449
  True
2317
- >>> repo_exists("huggingface/not-a-repo")
2450
+ >>> repo_exists("google/not-a-repo")
2318
2451
  False
2319
2452
  ```
2320
-
2321
- </Tip>
2322
2453
  """
2323
2454
  try:
2324
2455
  self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token)
@@ -2328,6 +2459,53 @@ class HfApi:
2328
2459
  except RepositoryNotFoundError:
2329
2460
  return False
2330
2461
 
2462
+ @validate_hf_hub_args
2463
+ def revision_exists(
2464
+ self,
2465
+ repo_id: str,
2466
+ revision: str,
2467
+ *,
2468
+ repo_type: Optional[str] = None,
2469
+ token: Optional[str] = None,
2470
+ ) -> bool:
2471
+ """
2472
+ Checks if a specific revision exists on a repo on the Hugging Face Hub.
2473
+
2474
+ Args:
2475
+ repo_id (`str`):
2476
+ A namespace (user or an organization) and a repo name separated
2477
+ by a `/`.
2478
+ revision (`str`):
2479
+ The revision of the repository to check.
2480
+ repo_type (`str`, *optional*):
2481
+ Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
2482
+ `None` or `"model"` if getting repository info from a model. Default is `None`.
2483
+ token (`bool` or `str`, *optional*):
2484
+ A valid authentication token (see https://huggingface.co/settings/token).
2485
+ If `None` or `True` and machine is logged in (through `huggingface-cli login`
2486
+ or [`~huggingface_hub.login`]), token will be retrieved from the cache.
2487
+ If `False`, token is not sent in the request header.
2488
+
2489
+ Returns:
2490
+ True if the repository and the revision exists, False otherwise.
2491
+
2492
+ Examples:
2493
+ ```py
2494
+ >>> from huggingface_hub import revision_exists
2495
+ >>> revision_exists("google/gemma-7b", "float16")
2496
+ True
2497
+ >>> revision_exists("google/gemma-7b", "not-a-revision")
2498
+ False
2499
+ ```
2500
+ """
2501
+ try:
2502
+ self.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
2503
+ return True
2504
+ except RevisionNotFoundError:
2505
+ return False
2506
+ except RepositoryNotFoundError:
2507
+ return False
2508
+
2331
2509
  @validate_hf_hub_args
2332
2510
  def file_exists(
2333
2511
  self,
@@ -2362,8 +2540,6 @@ class HfApi:
2362
2540
  Returns:
2363
2541
  True if the file exists, False otherwise.
2364
2542
 
2365
- <Tip>
2366
-
2367
2543
  Examples:
2368
2544
  ```py
2369
2545
  >>> from huggingface_hub import file_exists
@@ -2374,8 +2550,6 @@ class HfApi:
2374
2550
  >>> file_exists("bigcode/not-a-repo", "config.json")
2375
2551
  False
2376
2552
  ```
2377
-
2378
- </Tip>
2379
2553
  """
2380
2554
  url = hf_hub_url(
2381
2555
  repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=self.endpoint
@@ -3243,6 +3417,9 @@ class HfApi:
3243
3417
  raise
3244
3418
 
3245
3419
  @validate_hf_hub_args
3420
+ @_deprecate_arguments(
3421
+ version="0.24.0", deprecated_args=("organization", "name"), custom_message="Use `repo_id` instead."
3422
+ )
3246
3423
  def update_repo_visibility(
3247
3424
  self,
3248
3425
  repo_id: str,
@@ -3510,7 +3687,8 @@ class HfApi:
3510
3687
  [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
3511
3688
  If parent commit is not a valid commit OID.
3512
3689
  [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
3513
- If the Hub API returns an HTTP 400 error (bad request)
3690
+ If a README.md file with an invalid metadata section is committed. In this case, the commit will fail
3691
+ early, before trying to upload any file.
3514
3692
  [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
3515
3693
  If `create_pr` is `True` and revision is neither `None` nor `"main"`.
3516
3694
  [`~utils.RepositoryNotFoundError`]:
@@ -3552,6 +3730,32 @@ class HfApi:
3552
3730
  f" {nb_deletions} deletion(s)."
3553
3731
  )
3554
3732
 
3733
+ # If updating a README.md file, make sure the metadata format is valid
3734
+ # It's better to fail early than to fail after all the files have been uploaded.
3735
+ for addition in additions:
3736
+ if addition.path_in_repo == "README.md":
3737
+ with addition.as_file() as file:
3738
+ response = get_session().post(
3739
+ f"{ENDPOINT}/api/validate-yaml",
3740
+ json={"content": file.read().decode(), "repoType": repo_type},
3741
+ headers=self._build_hf_headers(token=token),
3742
+ )
3743
+ # Handle warnings (example: empty metadata)
3744
+ response_content = response.json()
3745
+ message = "\n".join(
3746
+ [f"- {warning.get('message')}" for warning in response_content.get("warnings", [])]
3747
+ )
3748
+ if message:
3749
+ warnings.warn(f"Warnings while validating metadata in README.md:\n{message}")
3750
+
3751
+ # Raise on errors
3752
+ try:
3753
+ hf_raise_for_status(response)
3754
+ except BadRequestError as e:
3755
+ errors = response_content.get("errors", [])
3756
+ message = "\n".join([f"- {error.get('message')}" for error in errors])
3757
+ raise ValueError(f"Invalid metadata in README.md.\n{message}") from e
3758
+
3555
3759
  # If updating twice the same file or update then delete a file in a single commit
3556
3760
  _warn_on_overwriting_operations(operations)
3557
3761
 
@@ -3565,7 +3769,7 @@ class HfApi:
3565
3769
  num_threads=num_threads,
3566
3770
  free_memory=False, # do not remove `CommitOperationAdd.path_or_fileobj` on LFS files for "normal" users
3567
3771
  )
3568
- files_to_copy = _fetch_lfs_files_to_copy(
3772
+ files_to_copy = _fetch_files_to_copy(
3569
3773
  copies=copies,
3570
3774
  repo_type=repo_type,
3571
3775
  repo_id=repo_id,
@@ -5145,7 +5349,7 @@ class HfApi:
5145
5349
  safetensors file, we parse the metadata from this file. If it's a sharded safetensors repo, we parse the
5146
5350
  metadata from the index file and then parse the metadata from each shard.
5147
5351
 
5148
- To parse metadata from a single safetensors file, use [`get_safetensors_metadata`].
5352
+ To parse metadata from a single safetensors file, use [`parse_safetensors_file_metadata`].
5149
5353
 
5150
5354
  For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
5151
5355
 
@@ -5688,18 +5892,19 @@ class HfApi:
5688
5892
  raise ValueError(f"Invalid discussion_status, must be one of {DISCUSSION_STATUS}")
5689
5893
 
5690
5894
  headers = self._build_hf_headers(token=token)
5691
- query_dict: Dict[str, str] = {}
5895
+ path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions"
5896
+
5897
+ params: Dict[str, Union[str, int]] = {}
5692
5898
  if discussion_type is not None:
5693
- query_dict["type"] = discussion_type
5899
+ params["type"] = discussion_type
5694
5900
  if discussion_status is not None:
5695
- query_dict["status"] = discussion_status
5901
+ params["status"] = discussion_status
5696
5902
  if author is not None:
5697
- query_dict["author"] = author
5903
+ params["author"] = author
5698
5904
 
5699
5905
  def _fetch_discussion_page(page_index: int):
5700
- query_string = urlencode({**query_dict, "page_index": page_index})
5701
- path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions?{query_string}"
5702
- resp = get_session().get(path, headers=headers)
5906
+ params["p"] = page_index
5907
+ resp = get_session().get(path, headers=headers, params=params)
5703
5908
  hf_raise_for_status(resp)
5704
5909
  paginated_discussions = resp.json()
5705
5910
  total = paginated_discussions["count"]
@@ -7633,6 +7838,15 @@ class HfApi:
7633
7838
 
7634
7839
  Returns: [`Collection`]
7635
7840
 
7841
+ Raises:
7842
+ `HTTPError`:
7843
+ HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
7844
+ or `admin` role in the organization the repo belongs to or if you passed a `read` token.
7845
+ `HTTPError`:
7846
+ HTTP 404 if the item you try to add to the collection does not exist on the Hub.
7847
+ `HTTPError`:
7848
+ HTTP 409 if the item you try to add to the collection is already in the collection (and exists_ok=False)
7849
+
7636
7850
  Example:
7637
7851
 
7638
7852
  ```py
@@ -8303,6 +8517,7 @@ list_spaces = api.list_spaces
8303
8517
  space_info = api.space_info
8304
8518
 
8305
8519
  repo_exists = api.repo_exists
8520
+ revision_exists = api.revision_exists
8306
8521
  file_exists = api.file_exists
8307
8522
  repo_info = api.repo_info
8308
8523
  list_repo_files = api.list_repo_files