huggingface-hub 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +19 -1
- huggingface_hub/_commit_api.py +49 -20
- huggingface_hub/_inference_endpoints.py +10 -0
- huggingface_hub/_login.py +2 -2
- huggingface_hub/commands/download.py +1 -1
- huggingface_hub/file_download.py +57 -21
- huggingface_hub/hf_api.py +269 -54
- huggingface_hub/hf_file_system.py +131 -8
- huggingface_hub/hub_mixin.py +204 -42
- huggingface_hub/inference/_client.py +56 -9
- huggingface_hub/inference/_common.py +4 -3
- huggingface_hub/inference/_generated/_async_client.py +57 -9
- huggingface_hub/inference/_text_generation.py +5 -0
- huggingface_hub/inference/_types.py +17 -0
- huggingface_hub/lfs.py +6 -3
- huggingface_hub/repocard.py +5 -3
- huggingface_hub/repocard_data.py +11 -3
- huggingface_hub/serialization/__init__.py +19 -0
- huggingface_hub/serialization/_base.py +168 -0
- huggingface_hub/serialization/_numpy.py +67 -0
- huggingface_hub/serialization/_tensorflow.py +93 -0
- huggingface_hub/serialization/_torch.py +199 -0
- huggingface_hub/templates/datasetcard_template.md +1 -1
- huggingface_hub/templates/modelcard_template.md +1 -4
- huggingface_hub/utils/__init__.py +14 -10
- huggingface_hub/utils/_datetime.py +4 -11
- huggingface_hub/utils/_errors.py +29 -0
- huggingface_hub/utils/_runtime.py +21 -15
- huggingface_hub/utils/endpoint_helpers.py +27 -1
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/METADATA +7 -3
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/RECORD +35 -30
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/top_level.txt +0 -0
huggingface_hub/hf_api.py
CHANGED
|
@@ -20,7 +20,7 @@ import re
|
|
|
20
20
|
import struct
|
|
21
21
|
import warnings
|
|
22
22
|
from concurrent.futures import Future, ThreadPoolExecutor
|
|
23
|
-
from dataclasses import dataclass, field
|
|
23
|
+
from dataclasses import asdict, dataclass, field
|
|
24
24
|
from datetime import datetime
|
|
25
25
|
from functools import wraps
|
|
26
26
|
from itertools import islice
|
|
@@ -36,12 +36,11 @@ from typing import (
|
|
|
36
36
|
Literal,
|
|
37
37
|
Optional,
|
|
38
38
|
Tuple,
|
|
39
|
-
TypedDict,
|
|
40
39
|
TypeVar,
|
|
41
40
|
Union,
|
|
42
41
|
overload,
|
|
43
42
|
)
|
|
44
|
-
from urllib.parse import quote
|
|
43
|
+
from urllib.parse import quote
|
|
45
44
|
|
|
46
45
|
import requests
|
|
47
46
|
from requests.exceptions import HTTPError
|
|
@@ -53,7 +52,7 @@ from ._commit_api import (
|
|
|
53
52
|
CommitOperationAdd,
|
|
54
53
|
CommitOperationCopy,
|
|
55
54
|
CommitOperationDelete,
|
|
56
|
-
|
|
55
|
+
_fetch_files_to_copy,
|
|
57
56
|
_fetch_upload_modes,
|
|
58
57
|
_prepare_commit_payload,
|
|
59
58
|
_upload_lfs_files,
|
|
@@ -130,7 +129,7 @@ from .utils import ( # noqa: F401 # imported for backward compatibility
|
|
|
130
129
|
validate_hf_hub_args,
|
|
131
130
|
)
|
|
132
131
|
from .utils import tqdm as hf_tqdm
|
|
133
|
-
from .utils._deprecation import _deprecate_method
|
|
132
|
+
from .utils._deprecation import _deprecate_arguments, _deprecate_method
|
|
134
133
|
from .utils._typing import CallableT
|
|
135
134
|
from .utils.endpoint_helpers import (
|
|
136
135
|
DatasetFilter,
|
|
@@ -238,36 +237,56 @@ def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> Tu
|
|
|
238
237
|
return repo_type, namespace, repo_id
|
|
239
238
|
|
|
240
239
|
|
|
241
|
-
|
|
240
|
+
@dataclass
|
|
241
|
+
class LastCommitInfo(dict):
|
|
242
242
|
oid: str
|
|
243
243
|
title: str
|
|
244
244
|
date: datetime
|
|
245
245
|
|
|
246
|
+
def __post_init__(self): # hack to make LastCommitInfo backward compatible
|
|
247
|
+
self.update(asdict(self))
|
|
248
|
+
|
|
246
249
|
|
|
247
|
-
|
|
250
|
+
@dataclass
|
|
251
|
+
class BlobLfsInfo(dict):
|
|
248
252
|
size: int
|
|
249
253
|
sha256: str
|
|
250
254
|
pointer_size: int
|
|
251
255
|
|
|
256
|
+
def __post_init__(self): # hack to make BlobLfsInfo backward compatible
|
|
257
|
+
self.update(asdict(self))
|
|
252
258
|
|
|
253
|
-
|
|
259
|
+
|
|
260
|
+
@dataclass
|
|
261
|
+
class BlobSecurityInfo(dict):
|
|
254
262
|
safe: bool
|
|
255
263
|
av_scan: Optional[Dict]
|
|
256
264
|
pickle_import_scan: Optional[Dict]
|
|
257
265
|
|
|
266
|
+
def __post_init__(self): # hack to make BlogSecurityInfo backward compatible
|
|
267
|
+
self.update(asdict(self))
|
|
268
|
+
|
|
258
269
|
|
|
259
|
-
|
|
270
|
+
@dataclass
|
|
271
|
+
class TransformersInfo(dict):
|
|
260
272
|
auto_model: str
|
|
261
|
-
custom_class: Optional[str]
|
|
273
|
+
custom_class: Optional[str] = None
|
|
262
274
|
# possible `pipeline_tag` values: https://github.com/huggingface/huggingface.js/blob/3ee32554b8620644a6287e786b2a83bf5caf559c/packages/tasks/src/pipelines.ts#L72
|
|
263
|
-
pipeline_tag: Optional[str]
|
|
264
|
-
processor: Optional[str]
|
|
275
|
+
pipeline_tag: Optional[str] = None
|
|
276
|
+
processor: Optional[str] = None
|
|
277
|
+
|
|
278
|
+
def __post_init__(self): # hack to make TransformersInfo backward compatible
|
|
279
|
+
self.update(asdict(self))
|
|
265
280
|
|
|
266
281
|
|
|
267
|
-
|
|
282
|
+
@dataclass
|
|
283
|
+
class SafeTensorsInfo(dict):
|
|
268
284
|
parameters: List[Dict[str, int]]
|
|
269
285
|
total: int
|
|
270
286
|
|
|
287
|
+
def __post_init__(self): # hack to make SafeTensorsInfo backward compatible
|
|
288
|
+
self.update(asdict(self))
|
|
289
|
+
|
|
271
290
|
|
|
272
291
|
@dataclass
|
|
273
292
|
class CommitInfo(str):
|
|
@@ -656,7 +675,7 @@ class ModelInfo:
|
|
|
656
675
|
ModelCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data
|
|
657
676
|
)
|
|
658
677
|
|
|
659
|
-
self.widget_data = kwargs.pop("
|
|
678
|
+
self.widget_data = kwargs.pop("widgetData", None)
|
|
660
679
|
self.model_index = kwargs.pop("model-index", None) or kwargs.pop("model_index", None)
|
|
661
680
|
self.config = kwargs.pop("config", None)
|
|
662
681
|
transformers_info = kwargs.pop("transformersInfo", None) or kwargs.pop("transformers_info", None)
|
|
@@ -1094,7 +1113,7 @@ class GitRefs:
|
|
|
1094
1113
|
branches: List[GitRefInfo]
|
|
1095
1114
|
converts: List[GitRefInfo]
|
|
1096
1115
|
tags: List[GitRefInfo]
|
|
1097
|
-
pull_requests: Optional[List[GitRefInfo]]
|
|
1116
|
+
pull_requests: Optional[List[GitRefInfo]] = None
|
|
1098
1117
|
|
|
1099
1118
|
|
|
1100
1119
|
@dataclass
|
|
@@ -1361,6 +1380,12 @@ class HfApi:
|
|
|
1361
1380
|
*,
|
|
1362
1381
|
filter: Union[ModelFilter, str, Iterable[str], None] = None,
|
|
1363
1382
|
author: Optional[str] = None,
|
|
1383
|
+
library: Optional[Union[str, List[str]]] = None,
|
|
1384
|
+
language: Optional[Union[str, List[str]]] = None,
|
|
1385
|
+
model_name: Optional[str] = None,
|
|
1386
|
+
task: Optional[Union[str, List[str]]] = None,
|
|
1387
|
+
trained_dataset: Optional[Union[str, List[str]]] = None,
|
|
1388
|
+
tags: Optional[Union[str, List[str]]] = None,
|
|
1364
1389
|
search: Optional[str] = None,
|
|
1365
1390
|
emissions_thresholds: Optional[Tuple[float, float]] = None,
|
|
1366
1391
|
sort: Union[Literal["last_modified"], str, None] = None,
|
|
@@ -1370,6 +1395,7 @@ class HfApi:
|
|
|
1370
1395
|
cardData: bool = False,
|
|
1371
1396
|
fetch_config: bool = False,
|
|
1372
1397
|
token: Optional[Union[bool, str]] = None,
|
|
1398
|
+
pipeline_tag: Optional[str] = None,
|
|
1373
1399
|
) -> Iterable[ModelInfo]:
|
|
1374
1400
|
"""
|
|
1375
1401
|
List models hosted on the Huggingface Hub, given some filters.
|
|
@@ -1381,6 +1407,24 @@ class HfApi:
|
|
|
1381
1407
|
author (`str`, *optional*):
|
|
1382
1408
|
A string which identify the author (user or organization) of the
|
|
1383
1409
|
returned models
|
|
1410
|
+
library (`str` or `List`, *optional*):
|
|
1411
|
+
A string or list of strings of foundational libraries models were
|
|
1412
|
+
originally trained from, such as pytorch, tensorflow, or allennlp.
|
|
1413
|
+
language (`str` or `List`, *optional*):
|
|
1414
|
+
A string or list of strings of languages, both by name and country
|
|
1415
|
+
code, such as "en" or "English"
|
|
1416
|
+
model_name (`str`, *optional*):
|
|
1417
|
+
A string that contain complete or partial names for models on the
|
|
1418
|
+
Hub, such as "bert" or "bert-base-cased"
|
|
1419
|
+
task (`str` or `List`, *optional*):
|
|
1420
|
+
A string or list of strings of tasks models were designed for, such
|
|
1421
|
+
as: "fill-mask" or "automatic-speech-recognition"
|
|
1422
|
+
trained_dataset (`str` or `List`, *optional*):
|
|
1423
|
+
A string tag or a list of string tags of the trained dataset for a
|
|
1424
|
+
model on the Hub.
|
|
1425
|
+
tags (`str` or `List`, *optional*):
|
|
1426
|
+
A string tag or a list of tags to filter models on the Hub by, such
|
|
1427
|
+
as `text-generation` or `spacy`.
|
|
1384
1428
|
search (`str`, *optional*):
|
|
1385
1429
|
A string that will be contained in the returned model ids.
|
|
1386
1430
|
emissions_thresholds (`Tuple`, *optional*):
|
|
@@ -1411,6 +1455,9 @@ class HfApi:
|
|
|
1411
1455
|
If `None` or `True` and machine is logged in (through `huggingface-cli login`
|
|
1412
1456
|
or [`~huggingface_hub.login`]), token will be retrieved from the cache.
|
|
1413
1457
|
If `False`, token is not sent in the request header.
|
|
1458
|
+
pipeline_tag (`str`, *optional*):
|
|
1459
|
+
A string pipeline tag to filter models on the Hub by, such as `summarization`
|
|
1460
|
+
|
|
1414
1461
|
|
|
1415
1462
|
Returns:
|
|
1416
1463
|
`Iterable[ModelInfo]`: an iterable of [`huggingface_hub.hf_api.ModelInfo`] objects.
|
|
@@ -1427,9 +1474,6 @@ class HfApi:
|
|
|
1427
1474
|
|
|
1428
1475
|
>>> # List only the text classification models
|
|
1429
1476
|
>>> api.list_models(filter="text-classification")
|
|
1430
|
-
>>> # Using the `ModelFilter`
|
|
1431
|
-
>>> filt = ModelFilter(task="text-classification")
|
|
1432
|
-
|
|
1433
1477
|
|
|
1434
1478
|
>>> # List only models from the AllenNLP library
|
|
1435
1479
|
>>> api.list_models(filter="allennlp")
|
|
@@ -1455,15 +1499,38 @@ class HfApi:
|
|
|
1455
1499
|
path = f"{self.endpoint}/api/models"
|
|
1456
1500
|
headers = self._build_hf_headers(token=token)
|
|
1457
1501
|
params = {}
|
|
1502
|
+
filter_list = []
|
|
1503
|
+
|
|
1458
1504
|
if filter is not None:
|
|
1459
1505
|
if isinstance(filter, ModelFilter):
|
|
1460
1506
|
params = self._unpack_model_filter(filter)
|
|
1461
1507
|
else:
|
|
1462
1508
|
params.update({"filter": filter})
|
|
1509
|
+
|
|
1463
1510
|
params.update({"full": True})
|
|
1464
|
-
|
|
1511
|
+
|
|
1512
|
+
# Build the filter list
|
|
1513
|
+
if author:
|
|
1465
1514
|
params.update({"author": author})
|
|
1466
|
-
if
|
|
1515
|
+
if model_name:
|
|
1516
|
+
params.update({"search": model_name})
|
|
1517
|
+
if library:
|
|
1518
|
+
filter_list.extend([library] if isinstance(library, str) else library)
|
|
1519
|
+
if task:
|
|
1520
|
+
filter_list.extend([task] if isinstance(task, str) else task)
|
|
1521
|
+
if trained_dataset:
|
|
1522
|
+
if not isinstance(trained_dataset, (list, tuple)):
|
|
1523
|
+
trained_dataset = [trained_dataset]
|
|
1524
|
+
for dataset in trained_dataset:
|
|
1525
|
+
if not dataset.startswith("dataset:"):
|
|
1526
|
+
dataset = f"dataset:{dataset}"
|
|
1527
|
+
filter_list.append(dataset)
|
|
1528
|
+
if language:
|
|
1529
|
+
filter_list.extend([language] if isinstance(language, str) else language)
|
|
1530
|
+
if tags:
|
|
1531
|
+
filter_list.extend([tags] if isinstance(tags, str) else tags)
|
|
1532
|
+
|
|
1533
|
+
if search:
|
|
1467
1534
|
params.update({"search": search})
|
|
1468
1535
|
if sort is not None:
|
|
1469
1536
|
params.update({"sort": "lastModified" if sort == "last_modified" else sort})
|
|
@@ -1480,6 +1547,13 @@ class HfApi:
|
|
|
1480
1547
|
params.update({"config": True})
|
|
1481
1548
|
if cardData:
|
|
1482
1549
|
params.update({"cardData": True})
|
|
1550
|
+
if pipeline_tag:
|
|
1551
|
+
params.update({"pipeline_tag": pipeline_tag})
|
|
1552
|
+
|
|
1553
|
+
filter_value = params.get("filter", [])
|
|
1554
|
+
if filter_value:
|
|
1555
|
+
filter_list.extend([filter_value] if isinstance(filter_value, str) else list(filter_value))
|
|
1556
|
+
params.update({"filter": filter_list})
|
|
1483
1557
|
|
|
1484
1558
|
# `items` is a generator
|
|
1485
1559
|
items = paginate(path, params=params, headers=headers)
|
|
@@ -1499,21 +1573,21 @@ class HfApi:
|
|
|
1499
1573
|
model_str = ""
|
|
1500
1574
|
|
|
1501
1575
|
# Handling author
|
|
1502
|
-
if model_filter.author
|
|
1576
|
+
if model_filter.author:
|
|
1503
1577
|
model_str = f"{model_filter.author}/"
|
|
1504
1578
|
|
|
1505
1579
|
# Handling model_name
|
|
1506
|
-
if model_filter.model_name
|
|
1580
|
+
if model_filter.model_name:
|
|
1507
1581
|
model_str += model_filter.model_name
|
|
1508
1582
|
|
|
1509
1583
|
filter_list: List[str] = []
|
|
1510
1584
|
|
|
1511
1585
|
# Handling tasks
|
|
1512
|
-
if model_filter.task
|
|
1586
|
+
if model_filter.task:
|
|
1513
1587
|
filter_list.extend([model_filter.task] if isinstance(model_filter.task, str) else model_filter.task)
|
|
1514
1588
|
|
|
1515
1589
|
# Handling dataset
|
|
1516
|
-
if model_filter.trained_dataset
|
|
1590
|
+
if model_filter.trained_dataset:
|
|
1517
1591
|
if not isinstance(model_filter.trained_dataset, (list, tuple)):
|
|
1518
1592
|
model_filter.trained_dataset = [model_filter.trained_dataset]
|
|
1519
1593
|
for dataset in model_filter.trained_dataset:
|
|
@@ -1532,7 +1606,7 @@ class HfApi:
|
|
|
1532
1606
|
filter_list.extend([model_filter.tags] if isinstance(model_filter.tags, str) else model_filter.tags)
|
|
1533
1607
|
|
|
1534
1608
|
query_dict: Dict[str, Any] = {}
|
|
1535
|
-
if model_str
|
|
1609
|
+
if model_str:
|
|
1536
1610
|
query_dict["search"] = model_str
|
|
1537
1611
|
if isinstance(model_filter.language, list):
|
|
1538
1612
|
filter_list.extend(model_filter.language)
|
|
@@ -1547,8 +1621,16 @@ class HfApi:
|
|
|
1547
1621
|
*,
|
|
1548
1622
|
filter: Union[DatasetFilter, str, Iterable[str], None] = None,
|
|
1549
1623
|
author: Optional[str] = None,
|
|
1624
|
+
benchmark: Optional[Union[str, List[str]]] = None,
|
|
1625
|
+
dataset_name: Optional[str] = None,
|
|
1626
|
+
language_creators: Optional[Union[str, List[str]]] = None,
|
|
1627
|
+
language: Optional[Union[str, List[str]]] = None,
|
|
1628
|
+
multilinguality: Optional[Union[str, List[str]]] = None,
|
|
1629
|
+
size_categories: Optional[Union[str, List[str]]] = None,
|
|
1630
|
+
task_categories: Optional[Union[str, List[str]]] = None,
|
|
1631
|
+
task_ids: Optional[Union[str, List[str]]] = None,
|
|
1550
1632
|
search: Optional[str] = None,
|
|
1551
|
-
sort: Union[Literal["last_modified"], str
|
|
1633
|
+
sort: Optional[Union[Literal["last_modified"], str]] = None,
|
|
1552
1634
|
direction: Optional[Literal[-1]] = None,
|
|
1553
1635
|
limit: Optional[int] = None,
|
|
1554
1636
|
full: Optional[bool] = None,
|
|
@@ -1563,6 +1645,34 @@ class HfApi:
|
|
|
1563
1645
|
datasets on the hub.
|
|
1564
1646
|
author (`str`, *optional*):
|
|
1565
1647
|
A string which identify the author of the returned datasets.
|
|
1648
|
+
benchmark (`str` or `List`, *optional*):
|
|
1649
|
+
A string or list of strings that can be used to identify datasets on
|
|
1650
|
+
the Hub by their official benchmark.
|
|
1651
|
+
dataset_name (`str`, *optional*):
|
|
1652
|
+
A string or list of strings that can be used to identify datasets on
|
|
1653
|
+
the Hub by its name, such as `SQAC` or `wikineural`
|
|
1654
|
+
language_creators (`str` or `List`, *optional*):
|
|
1655
|
+
A string or list of strings that can be used to identify datasets on
|
|
1656
|
+
the Hub with how the data was curated, such as `crowdsourced` or
|
|
1657
|
+
`machine_generated`.
|
|
1658
|
+
language (`str` or `List`, *optional*):
|
|
1659
|
+
A string or list of strings representing a two-character language to
|
|
1660
|
+
filter datasets by on the Hub.
|
|
1661
|
+
multilinguality (`str` or `List`, *optional*):
|
|
1662
|
+
A string or list of strings representing a filter for datasets that
|
|
1663
|
+
contain multiple languages.
|
|
1664
|
+
size_categories (`str` or `List`, *optional*):
|
|
1665
|
+
A string or list of strings that can be used to identify datasets on
|
|
1666
|
+
the Hub by the size of the dataset such as `100K<n<1M` or
|
|
1667
|
+
`1M<n<10M`.
|
|
1668
|
+
task_categories (`str` or `List`, *optional*):
|
|
1669
|
+
A string or list of strings that can be used to identify datasets on
|
|
1670
|
+
the Hub by the designed task, such as `audio_classification` or
|
|
1671
|
+
`named_entity_recognition`.
|
|
1672
|
+
task_ids (`str` or `List`, *optional*):
|
|
1673
|
+
A string or list of strings that can be used to identify datasets on
|
|
1674
|
+
the Hub by the specific task such as `speech_emotion_recognition` or
|
|
1675
|
+
`paraphrase`.
|
|
1566
1676
|
search (`str`, *optional*):
|
|
1567
1677
|
A string that will be contained in the returned datasets.
|
|
1568
1678
|
sort (`Literal["last_modified"]` or `str`, *optional*):
|
|
@@ -1600,16 +1710,12 @@ class HfApi:
|
|
|
1600
1710
|
|
|
1601
1711
|
>>> # List only the text classification datasets
|
|
1602
1712
|
>>> api.list_datasets(filter="task_categories:text-classification")
|
|
1603
|
-
>>> # Using the `DatasetFilter`
|
|
1604
|
-
>>> filt = DatasetFilter(task_categories="text-classification")
|
|
1605
1713
|
|
|
1606
1714
|
|
|
1607
1715
|
>>> # List only the datasets in russian for language modeling
|
|
1608
1716
|
>>> api.list_datasets(
|
|
1609
1717
|
... filter=("language:ru", "task_ids:language-modeling")
|
|
1610
1718
|
... )
|
|
1611
|
-
>>> # Using the `DatasetFilter`
|
|
1612
|
-
>>> filt = DatasetFilter(language="ru", task_ids="language-modeling")
|
|
1613
1719
|
|
|
1614
1720
|
>>> api.list_datasets(filter=filt)
|
|
1615
1721
|
```
|
|
@@ -1631,14 +1737,38 @@ class HfApi:
|
|
|
1631
1737
|
path = f"{self.endpoint}/api/datasets"
|
|
1632
1738
|
headers = self._build_hf_headers(token=token)
|
|
1633
1739
|
params = {}
|
|
1740
|
+
filter_list = []
|
|
1741
|
+
|
|
1634
1742
|
if filter is not None:
|
|
1635
1743
|
if isinstance(filter, DatasetFilter):
|
|
1636
1744
|
params = self._unpack_dataset_filter(filter)
|
|
1637
1745
|
else:
|
|
1638
1746
|
params.update({"filter": filter})
|
|
1639
|
-
|
|
1747
|
+
|
|
1748
|
+
# Build the filter list
|
|
1749
|
+
if author:
|
|
1640
1750
|
params.update({"author": author})
|
|
1641
|
-
if
|
|
1751
|
+
if dataset_name:
|
|
1752
|
+
params.update({"search": dataset_name})
|
|
1753
|
+
|
|
1754
|
+
for attr in (
|
|
1755
|
+
benchmark,
|
|
1756
|
+
language_creators,
|
|
1757
|
+
language,
|
|
1758
|
+
multilinguality,
|
|
1759
|
+
size_categories,
|
|
1760
|
+
task_categories,
|
|
1761
|
+
task_ids,
|
|
1762
|
+
):
|
|
1763
|
+
if attr:
|
|
1764
|
+
if not isinstance(attr, (list, tuple)):
|
|
1765
|
+
attr = [attr]
|
|
1766
|
+
for data in attr:
|
|
1767
|
+
if not data.startswith(f"{attr}:"):
|
|
1768
|
+
data = f"{attr}:{data}"
|
|
1769
|
+
filter_list.append(data)
|
|
1770
|
+
|
|
1771
|
+
if search:
|
|
1642
1772
|
params.update({"search": search})
|
|
1643
1773
|
if sort is not None:
|
|
1644
1774
|
params.update({"sort": "lastModified" if sort == "last_modified" else sort})
|
|
@@ -1649,6 +1779,11 @@ class HfApi:
|
|
|
1649
1779
|
if full:
|
|
1650
1780
|
params.update({"full": True})
|
|
1651
1781
|
|
|
1782
|
+
filter_value = params.get("filter", [])
|
|
1783
|
+
if filter_value:
|
|
1784
|
+
filter_list.extend([filter_value] if isinstance(filter_value, str) else list(filter_value))
|
|
1785
|
+
params.update({"filter": filter_list})
|
|
1786
|
+
|
|
1652
1787
|
items = paginate(path, params=params, headers=headers)
|
|
1653
1788
|
if limit is not None:
|
|
1654
1789
|
items = islice(items, limit) # Do not iterate over all pages
|
|
@@ -1664,11 +1799,11 @@ class HfApi:
|
|
|
1664
1799
|
dataset_str = ""
|
|
1665
1800
|
|
|
1666
1801
|
# Handling author
|
|
1667
|
-
if dataset_filter.author
|
|
1802
|
+
if dataset_filter.author:
|
|
1668
1803
|
dataset_str = f"{dataset_filter.author}/"
|
|
1669
1804
|
|
|
1670
1805
|
# Handling dataset_name
|
|
1671
|
-
if dataset_filter.dataset_name
|
|
1806
|
+
if dataset_filter.dataset_name:
|
|
1672
1807
|
dataset_str += dataset_filter.dataset_name
|
|
1673
1808
|
|
|
1674
1809
|
filter_list = []
|
|
@@ -2307,18 +2442,14 @@ class HfApi:
|
|
|
2307
2442
|
Returns:
|
|
2308
2443
|
True if the repository exists, False otherwise.
|
|
2309
2444
|
|
|
2310
|
-
<Tip>
|
|
2311
|
-
|
|
2312
2445
|
Examples:
|
|
2313
2446
|
```py
|
|
2314
2447
|
>>> from huggingface_hub import repo_exists
|
|
2315
|
-
>>> repo_exists("
|
|
2448
|
+
>>> repo_exists("google/gemma-7b")
|
|
2316
2449
|
True
|
|
2317
|
-
>>> repo_exists("
|
|
2450
|
+
>>> repo_exists("google/not-a-repo")
|
|
2318
2451
|
False
|
|
2319
2452
|
```
|
|
2320
|
-
|
|
2321
|
-
</Tip>
|
|
2322
2453
|
"""
|
|
2323
2454
|
try:
|
|
2324
2455
|
self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token)
|
|
@@ -2328,6 +2459,53 @@ class HfApi:
|
|
|
2328
2459
|
except RepositoryNotFoundError:
|
|
2329
2460
|
return False
|
|
2330
2461
|
|
|
2462
|
+
@validate_hf_hub_args
|
|
2463
|
+
def revision_exists(
|
|
2464
|
+
self,
|
|
2465
|
+
repo_id: str,
|
|
2466
|
+
revision: str,
|
|
2467
|
+
*,
|
|
2468
|
+
repo_type: Optional[str] = None,
|
|
2469
|
+
token: Optional[str] = None,
|
|
2470
|
+
) -> bool:
|
|
2471
|
+
"""
|
|
2472
|
+
Checks if a specific revision exists on a repo on the Hugging Face Hub.
|
|
2473
|
+
|
|
2474
|
+
Args:
|
|
2475
|
+
repo_id (`str`):
|
|
2476
|
+
A namespace (user or an organization) and a repo name separated
|
|
2477
|
+
by a `/`.
|
|
2478
|
+
revision (`str`):
|
|
2479
|
+
The revision of the repository to check.
|
|
2480
|
+
repo_type (`str`, *optional*):
|
|
2481
|
+
Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space,
|
|
2482
|
+
`None` or `"model"` if getting repository info from a model. Default is `None`.
|
|
2483
|
+
token (`bool` or `str`, *optional*):
|
|
2484
|
+
A valid authentication token (see https://huggingface.co/settings/token).
|
|
2485
|
+
If `None` or `True` and machine is logged in (through `huggingface-cli login`
|
|
2486
|
+
or [`~huggingface_hub.login`]), token will be retrieved from the cache.
|
|
2487
|
+
If `False`, token is not sent in the request header.
|
|
2488
|
+
|
|
2489
|
+
Returns:
|
|
2490
|
+
True if the repository and the revision exists, False otherwise.
|
|
2491
|
+
|
|
2492
|
+
Examples:
|
|
2493
|
+
```py
|
|
2494
|
+
>>> from huggingface_hub import revision_exists
|
|
2495
|
+
>>> revision_exists("google/gemma-7b", "float16")
|
|
2496
|
+
True
|
|
2497
|
+
>>> revision_exists("google/gemma-7b", "not-a-revision")
|
|
2498
|
+
False
|
|
2499
|
+
```
|
|
2500
|
+
"""
|
|
2501
|
+
try:
|
|
2502
|
+
self.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
|
|
2503
|
+
return True
|
|
2504
|
+
except RevisionNotFoundError:
|
|
2505
|
+
return False
|
|
2506
|
+
except RepositoryNotFoundError:
|
|
2507
|
+
return False
|
|
2508
|
+
|
|
2331
2509
|
@validate_hf_hub_args
|
|
2332
2510
|
def file_exists(
|
|
2333
2511
|
self,
|
|
@@ -2362,8 +2540,6 @@ class HfApi:
|
|
|
2362
2540
|
Returns:
|
|
2363
2541
|
True if the file exists, False otherwise.
|
|
2364
2542
|
|
|
2365
|
-
<Tip>
|
|
2366
|
-
|
|
2367
2543
|
Examples:
|
|
2368
2544
|
```py
|
|
2369
2545
|
>>> from huggingface_hub import file_exists
|
|
@@ -2374,8 +2550,6 @@ class HfApi:
|
|
|
2374
2550
|
>>> file_exists("bigcode/not-a-repo", "config.json")
|
|
2375
2551
|
False
|
|
2376
2552
|
```
|
|
2377
|
-
|
|
2378
|
-
</Tip>
|
|
2379
2553
|
"""
|
|
2380
2554
|
url = hf_hub_url(
|
|
2381
2555
|
repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=self.endpoint
|
|
@@ -3243,6 +3417,9 @@ class HfApi:
|
|
|
3243
3417
|
raise
|
|
3244
3418
|
|
|
3245
3419
|
@validate_hf_hub_args
|
|
3420
|
+
@_deprecate_arguments(
|
|
3421
|
+
version="0.24.0", deprecated_args=("organization", "name"), custom_message="Use `repo_id` instead."
|
|
3422
|
+
)
|
|
3246
3423
|
def update_repo_visibility(
|
|
3247
3424
|
self,
|
|
3248
3425
|
repo_id: str,
|
|
@@ -3510,7 +3687,8 @@ class HfApi:
|
|
|
3510
3687
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
3511
3688
|
If parent commit is not a valid commit OID.
|
|
3512
3689
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
3513
|
-
If
|
|
3690
|
+
If a README.md file with an invalid metadata section is committed. In this case, the commit will fail
|
|
3691
|
+
early, before trying to upload any file.
|
|
3514
3692
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
3515
3693
|
If `create_pr` is `True` and revision is neither `None` nor `"main"`.
|
|
3516
3694
|
[`~utils.RepositoryNotFoundError`]:
|
|
@@ -3552,6 +3730,32 @@ class HfApi:
|
|
|
3552
3730
|
f" {nb_deletions} deletion(s)."
|
|
3553
3731
|
)
|
|
3554
3732
|
|
|
3733
|
+
# If updating a README.md file, make sure the metadata format is valid
|
|
3734
|
+
# It's better to fail early than to fail after all the files have been uploaded.
|
|
3735
|
+
for addition in additions:
|
|
3736
|
+
if addition.path_in_repo == "README.md":
|
|
3737
|
+
with addition.as_file() as file:
|
|
3738
|
+
response = get_session().post(
|
|
3739
|
+
f"{ENDPOINT}/api/validate-yaml",
|
|
3740
|
+
json={"content": file.read().decode(), "repoType": repo_type},
|
|
3741
|
+
headers=self._build_hf_headers(token=token),
|
|
3742
|
+
)
|
|
3743
|
+
# Handle warnings (example: empty metadata)
|
|
3744
|
+
response_content = response.json()
|
|
3745
|
+
message = "\n".join(
|
|
3746
|
+
[f"- {warning.get('message')}" for warning in response_content.get("warnings", [])]
|
|
3747
|
+
)
|
|
3748
|
+
if message:
|
|
3749
|
+
warnings.warn(f"Warnings while validating metadata in README.md:\n{message}")
|
|
3750
|
+
|
|
3751
|
+
# Raise on errors
|
|
3752
|
+
try:
|
|
3753
|
+
hf_raise_for_status(response)
|
|
3754
|
+
except BadRequestError as e:
|
|
3755
|
+
errors = response_content.get("errors", [])
|
|
3756
|
+
message = "\n".join([f"- {error.get('message')}" for error in errors])
|
|
3757
|
+
raise ValueError(f"Invalid metadata in README.md.\n{message}") from e
|
|
3758
|
+
|
|
3555
3759
|
# If updating twice the same file or update then delete a file in a single commit
|
|
3556
3760
|
_warn_on_overwriting_operations(operations)
|
|
3557
3761
|
|
|
@@ -3565,7 +3769,7 @@ class HfApi:
|
|
|
3565
3769
|
num_threads=num_threads,
|
|
3566
3770
|
free_memory=False, # do not remove `CommitOperationAdd.path_or_fileobj` on LFS files for "normal" users
|
|
3567
3771
|
)
|
|
3568
|
-
files_to_copy =
|
|
3772
|
+
files_to_copy = _fetch_files_to_copy(
|
|
3569
3773
|
copies=copies,
|
|
3570
3774
|
repo_type=repo_type,
|
|
3571
3775
|
repo_id=repo_id,
|
|
@@ -5145,7 +5349,7 @@ class HfApi:
|
|
|
5145
5349
|
safetensors file, we parse the metadata from this file. If it's a sharded safetensors repo, we parse the
|
|
5146
5350
|
metadata from the index file and then parse the metadata from each shard.
|
|
5147
5351
|
|
|
5148
|
-
To parse metadata from a single safetensors file, use [`
|
|
5352
|
+
To parse metadata from a single safetensors file, use [`parse_safetensors_file_metadata`].
|
|
5149
5353
|
|
|
5150
5354
|
For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format.
|
|
5151
5355
|
|
|
@@ -5688,18 +5892,19 @@ class HfApi:
|
|
|
5688
5892
|
raise ValueError(f"Invalid discussion_status, must be one of {DISCUSSION_STATUS}")
|
|
5689
5893
|
|
|
5690
5894
|
headers = self._build_hf_headers(token=token)
|
|
5691
|
-
|
|
5895
|
+
path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions"
|
|
5896
|
+
|
|
5897
|
+
params: Dict[str, Union[str, int]] = {}
|
|
5692
5898
|
if discussion_type is not None:
|
|
5693
|
-
|
|
5899
|
+
params["type"] = discussion_type
|
|
5694
5900
|
if discussion_status is not None:
|
|
5695
|
-
|
|
5901
|
+
params["status"] = discussion_status
|
|
5696
5902
|
if author is not None:
|
|
5697
|
-
|
|
5903
|
+
params["author"] = author
|
|
5698
5904
|
|
|
5699
5905
|
def _fetch_discussion_page(page_index: int):
|
|
5700
|
-
|
|
5701
|
-
path =
|
|
5702
|
-
resp = get_session().get(path, headers=headers)
|
|
5906
|
+
params["p"] = page_index
|
|
5907
|
+
resp = get_session().get(path, headers=headers, params=params)
|
|
5703
5908
|
hf_raise_for_status(resp)
|
|
5704
5909
|
paginated_discussions = resp.json()
|
|
5705
5910
|
total = paginated_discussions["count"]
|
|
@@ -7633,6 +7838,15 @@ class HfApi:
|
|
|
7633
7838
|
|
|
7634
7839
|
Returns: [`Collection`]
|
|
7635
7840
|
|
|
7841
|
+
Raises:
|
|
7842
|
+
`HTTPError`:
|
|
7843
|
+
HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write`
|
|
7844
|
+
or `admin` role in the organization the repo belongs to or if you passed a `read` token.
|
|
7845
|
+
`HTTPError`:
|
|
7846
|
+
HTTP 404 if the item you try to add to the collection does not exist on the Hub.
|
|
7847
|
+
`HTTPError`:
|
|
7848
|
+
HTTP 409 if the item you try to add to the collection is already in the collection (and exists_ok=False)
|
|
7849
|
+
|
|
7636
7850
|
Example:
|
|
7637
7851
|
|
|
7638
7852
|
```py
|
|
@@ -8303,6 +8517,7 @@ list_spaces = api.list_spaces
|
|
|
8303
8517
|
space_info = api.space_info
|
|
8304
8518
|
|
|
8305
8519
|
repo_exists = api.repo_exists
|
|
8520
|
+
revision_exists = api.revision_exists
|
|
8306
8521
|
file_exists = api.file_exists
|
|
8307
8522
|
repo_info = api.repo_info
|
|
8308
8523
|
list_repo_files = api.list_repo_files
|