huggingface-hub 0.29.3rc0__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +16 -1
- huggingface_hub/_commit_api.py +142 -4
- huggingface_hub/_space_api.py +15 -2
- huggingface_hub/_webhooks_server.py +2 -0
- huggingface_hub/commands/delete_cache.py +66 -20
- huggingface_hub/commands/upload.py +16 -2
- huggingface_hub/constants.py +45 -7
- huggingface_hub/errors.py +19 -0
- huggingface_hub/file_download.py +163 -35
- huggingface_hub/hf_api.py +349 -28
- huggingface_hub/hub_mixin.py +19 -4
- huggingface_hub/inference/_client.py +73 -70
- huggingface_hub/inference/_generated/_async_client.py +80 -77
- huggingface_hub/inference/_generated/types/__init__.py +1 -0
- huggingface_hub/inference/_generated/types/chat_completion.py +20 -10
- huggingface_hub/inference/_generated/types/image_to_image.py +2 -0
- huggingface_hub/inference/_providers/__init__.py +7 -1
- huggingface_hub/inference/_providers/_common.py +9 -5
- huggingface_hub/inference/_providers/black_forest_labs.py +5 -5
- huggingface_hub/inference/_providers/cohere.py +1 -1
- huggingface_hub/inference/_providers/fal_ai.py +64 -7
- huggingface_hub/inference/_providers/fireworks_ai.py +4 -1
- huggingface_hub/inference/_providers/hf_inference.py +41 -4
- huggingface_hub/inference/_providers/hyperbolic.py +3 -3
- huggingface_hub/inference/_providers/nebius.py +3 -3
- huggingface_hub/inference/_providers/novita.py +35 -5
- huggingface_hub/inference/_providers/openai.py +22 -0
- huggingface_hub/inference/_providers/replicate.py +3 -3
- huggingface_hub/inference/_providers/together.py +3 -3
- huggingface_hub/utils/__init__.py +8 -0
- huggingface_hub/utils/_http.py +4 -1
- huggingface_hub/utils/_runtime.py +11 -0
- huggingface_hub/utils/_xet.py +199 -0
- huggingface_hub/utils/tqdm.py +30 -2
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0.dist-info}/METADATA +3 -1
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0.dist-info}/RECORD +40 -38
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0.dist-info}/top_level.txt +0 -0
huggingface_hub/hf_api.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from __future__ import annotations
|
|
16
16
|
|
|
17
17
|
import inspect
|
|
18
|
+
import io
|
|
18
19
|
import json
|
|
19
20
|
import re
|
|
20
21
|
import struct
|
|
@@ -41,7 +42,7 @@ from typing import (
|
|
|
41
42
|
Union,
|
|
42
43
|
overload,
|
|
43
44
|
)
|
|
44
|
-
from urllib.parse import quote
|
|
45
|
+
from urllib.parse import quote, unquote
|
|
45
46
|
|
|
46
47
|
import requests
|
|
47
48
|
from requests.exceptions import HTTPError
|
|
@@ -58,6 +59,7 @@ from ._commit_api import (
|
|
|
58
59
|
_fetch_upload_modes,
|
|
59
60
|
_prepare_commit_payload,
|
|
60
61
|
_upload_lfs_files,
|
|
62
|
+
_upload_xet_files,
|
|
61
63
|
_warn_on_overwriting_operations,
|
|
62
64
|
)
|
|
63
65
|
from ._inference_endpoints import InferenceEndpoint, InferenceEndpointType
|
|
@@ -112,6 +114,8 @@ from .utils import (
|
|
|
112
114
|
SafetensorsRepoMetadata,
|
|
113
115
|
TensorInfo,
|
|
114
116
|
build_hf_headers,
|
|
117
|
+
chunk_iterable,
|
|
118
|
+
experimental,
|
|
115
119
|
filter_repo_objects,
|
|
116
120
|
fix_hf_endpoint_in_url,
|
|
117
121
|
get_session,
|
|
@@ -125,6 +129,7 @@ from .utils import (
|
|
|
125
129
|
from .utils import tqdm as hf_tqdm
|
|
126
130
|
from .utils._auth import _get_token_from_environment, _get_token_from_file, _get_token_from_google_colab
|
|
127
131
|
from .utils._deprecation import _deprecate_method
|
|
132
|
+
from .utils._runtime import is_xet_available
|
|
128
133
|
from .utils._typing import CallableT
|
|
129
134
|
from .utils.endpoint_helpers import _is_emission_within_threshold
|
|
130
135
|
|
|
@@ -163,6 +168,7 @@ ExpandModelProperty_T = Literal[
|
|
|
163
168
|
"trendingScore",
|
|
164
169
|
"usedStorage",
|
|
165
170
|
"widgetData",
|
|
171
|
+
"xetEnabled",
|
|
166
172
|
]
|
|
167
173
|
|
|
168
174
|
ExpandDatasetProperty_T = Literal[
|
|
@@ -185,6 +191,7 @@ ExpandDatasetProperty_T = Literal[
|
|
|
185
191
|
"tags",
|
|
186
192
|
"trendingScore",
|
|
187
193
|
"usedStorage",
|
|
194
|
+
"xetEnabled",
|
|
188
195
|
]
|
|
189
196
|
|
|
190
197
|
ExpandSpaceProperty_T = Literal[
|
|
@@ -206,6 +213,7 @@ ExpandSpaceProperty_T = Literal[
|
|
|
206
213
|
"tags",
|
|
207
214
|
"trendingScore",
|
|
208
215
|
"usedStorage",
|
|
216
|
+
"xetEnabled",
|
|
209
217
|
]
|
|
210
218
|
|
|
211
219
|
USERNAME_PLACEHOLDER = "hf_user"
|
|
@@ -816,6 +824,7 @@ class ModelInfo:
|
|
|
816
824
|
spaces: Optional[List[str]]
|
|
817
825
|
safetensors: Optional[SafeTensorsInfo]
|
|
818
826
|
security_repo_status: Optional[Dict]
|
|
827
|
+
xet_enabled: Optional[bool]
|
|
819
828
|
|
|
820
829
|
def __init__(self, **kwargs):
|
|
821
830
|
self.id = kwargs.pop("id")
|
|
@@ -890,6 +899,7 @@ class ModelInfo:
|
|
|
890
899
|
else None
|
|
891
900
|
)
|
|
892
901
|
self.security_repo_status = kwargs.pop("securityRepoStatus", None)
|
|
902
|
+
self.xet_enabled = kwargs.pop("xetEnabled", None)
|
|
893
903
|
# backwards compatibility
|
|
894
904
|
self.lastModified = self.last_modified
|
|
895
905
|
self.cardData = self.card_data
|
|
@@ -963,6 +973,7 @@ class DatasetInfo:
|
|
|
963
973
|
trending_score: Optional[int]
|
|
964
974
|
card_data: Optional[DatasetCardData]
|
|
965
975
|
siblings: Optional[List[RepoSibling]]
|
|
976
|
+
xet_enabled: Optional[bool]
|
|
966
977
|
|
|
967
978
|
def __init__(self, **kwargs):
|
|
968
979
|
self.id = kwargs.pop("id")
|
|
@@ -1008,7 +1019,7 @@ class DatasetInfo:
|
|
|
1008
1019
|
if siblings is not None
|
|
1009
1020
|
else None
|
|
1010
1021
|
)
|
|
1011
|
-
|
|
1022
|
+
self.xet_enabled = kwargs.pop("xetEnabled", None)
|
|
1012
1023
|
# backwards compatibility
|
|
1013
1024
|
self.lastModified = self.last_modified
|
|
1014
1025
|
self.cardData = self.card_data
|
|
@@ -1090,6 +1101,7 @@ class SpaceInfo:
|
|
|
1090
1101
|
runtime: Optional[SpaceRuntime]
|
|
1091
1102
|
models: Optional[List[str]]
|
|
1092
1103
|
datasets: Optional[List[str]]
|
|
1104
|
+
xet_enabled: Optional[bool]
|
|
1093
1105
|
|
|
1094
1106
|
def __init__(self, **kwargs):
|
|
1095
1107
|
self.id = kwargs.pop("id")
|
|
@@ -1138,7 +1150,7 @@ class SpaceInfo:
|
|
|
1138
1150
|
self.runtime = SpaceRuntime(runtime) if runtime else None
|
|
1139
1151
|
self.models = kwargs.pop("models", None)
|
|
1140
1152
|
self.datasets = kwargs.pop("datasets", None)
|
|
1141
|
-
|
|
1153
|
+
self.xet_enabled = kwargs.pop("xetEnabled", None)
|
|
1142
1154
|
# backwards compatibility
|
|
1143
1155
|
self.lastModified = self.last_modified
|
|
1144
1156
|
self.cardData = self.card_data
|
|
@@ -1521,6 +1533,67 @@ class PaperInfo:
|
|
|
1521
1533
|
self.__dict__.update(**kwargs)
|
|
1522
1534
|
|
|
1523
1535
|
|
|
1536
|
+
@dataclass
|
|
1537
|
+
class LFSFileInfo:
|
|
1538
|
+
"""
|
|
1539
|
+
Contains information about a file stored as LFS on a repo on the Hub.
|
|
1540
|
+
|
|
1541
|
+
Used in the context of listing and permanently deleting LFS files from a repo to free-up space.
|
|
1542
|
+
See [`list_lfs_files`] and [`permanently_delete_lfs_files`] for more details.
|
|
1543
|
+
|
|
1544
|
+
Git LFS files are tracked using SHA-256 object IDs, rather than file paths, to optimize performance
|
|
1545
|
+
This approach is necessary because a single object can be referenced by multiple paths across different commits,
|
|
1546
|
+
making it impractical to search and resolve these connections. Check out [our documentation](https://huggingface.co/docs/hub/storage-limits#advanced-track-lfs-file-references)
|
|
1547
|
+
to learn how to know which filename(s) is(are) associated with each SHA.
|
|
1548
|
+
|
|
1549
|
+
Attributes:
|
|
1550
|
+
file_oid (`str`):
|
|
1551
|
+
SHA-256 object ID of the file. This is the identifier to pass when permanently deleting the file.
|
|
1552
|
+
filename (`str`):
|
|
1553
|
+
Possible filename for the LFS object. See the note above for more information.
|
|
1554
|
+
oid (`str`):
|
|
1555
|
+
OID of the LFS object.
|
|
1556
|
+
pushed_at (`datetime`):
|
|
1557
|
+
Date the LFS object was pushed to the repo.
|
|
1558
|
+
ref (`str`, *optional*):
|
|
1559
|
+
Ref where the LFS object has been pushed (if any).
|
|
1560
|
+
size (`int`):
|
|
1561
|
+
Size of the LFS object.
|
|
1562
|
+
|
|
1563
|
+
Example:
|
|
1564
|
+
```py
|
|
1565
|
+
>>> from huggingface_hub import HfApi
|
|
1566
|
+
>>> api = HfApi()
|
|
1567
|
+
>>> lfs_files = api.list_lfs_files("username/my-cool-repo")
|
|
1568
|
+
|
|
1569
|
+
# Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
|
|
1570
|
+
# e.g. select only LFS files in the "checkpoints" folder
|
|
1571
|
+
>>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
|
|
1572
|
+
|
|
1573
|
+
# Permanently delete LFS files
|
|
1574
|
+
>>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
|
|
1575
|
+
```
|
|
1576
|
+
"""
|
|
1577
|
+
|
|
1578
|
+
file_oid: str
|
|
1579
|
+
filename: str
|
|
1580
|
+
oid: str
|
|
1581
|
+
pushed_at: datetime
|
|
1582
|
+
ref: Optional[str]
|
|
1583
|
+
size: int
|
|
1584
|
+
|
|
1585
|
+
def __init__(self, **kwargs) -> None:
|
|
1586
|
+
self.file_oid = kwargs.pop("fileOid")
|
|
1587
|
+
self.filename = kwargs.pop("filename")
|
|
1588
|
+
self.oid = kwargs.pop("oid")
|
|
1589
|
+
self.pushed_at = parse_datetime(kwargs.pop("pushedAt"))
|
|
1590
|
+
self.ref = kwargs.pop("ref", None)
|
|
1591
|
+
self.size = kwargs.pop("size")
|
|
1592
|
+
|
|
1593
|
+
# forward compatibility
|
|
1594
|
+
self.__dict__.update(**kwargs)
|
|
1595
|
+
|
|
1596
|
+
|
|
1524
1597
|
def future_compatible(fn: CallableT) -> CallableT:
|
|
1525
1598
|
"""Wrap a method of `HfApi` to handle `run_as_future=True`.
|
|
1526
1599
|
|
|
@@ -1818,7 +1891,7 @@ class HfApi:
|
|
|
1818
1891
|
expand (`List[ExpandModelProperty_T]`, *optional*):
|
|
1819
1892
|
List properties to return in the response. When used, only the properties in the list will be returned.
|
|
1820
1893
|
This parameter cannot be used if `full`, `cardData` or `fetch_config` are passed.
|
|
1821
|
-
Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"` and `"
|
|
1894
|
+
Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
|
|
1822
1895
|
full (`bool`, *optional*):
|
|
1823
1896
|
Whether to fetch all model data, including the `last_modified`,
|
|
1824
1897
|
the `sha`, the files and the `tags`. This is set to `True` by
|
|
@@ -2038,7 +2111,7 @@ class HfApi:
|
|
|
2038
2111
|
expand (`List[ExpandDatasetProperty_T]`, *optional*):
|
|
2039
2112
|
List properties to return in the response. When used, only the properties in the list will be returned.
|
|
2040
2113
|
This parameter cannot be used if `full` is passed.
|
|
2041
|
-
Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`, `"usedStorage"` and `"
|
|
2114
|
+
Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
|
|
2042
2115
|
full (`bool`, *optional*):
|
|
2043
2116
|
Whether to fetch all dataset data, including the `last_modified`,
|
|
2044
2117
|
the `card_data` and the files. Can contain useful information such as the
|
|
@@ -2216,7 +2289,7 @@ class HfApi:
|
|
|
2216
2289
|
expand (`List[ExpandSpaceProperty_T]`, *optional*):
|
|
2217
2290
|
List properties to return in the response. When used, only the properties in the list will be returned.
|
|
2218
2291
|
This parameter cannot be used if `full` is passed.
|
|
2219
|
-
Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"` and `"
|
|
2292
|
+
Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
|
|
2220
2293
|
full (`bool`, *optional*):
|
|
2221
2294
|
Whether to fetch all Spaces data, including the `last_modified`, `siblings`
|
|
2222
2295
|
and `card_data` fields.
|
|
@@ -2477,7 +2550,7 @@ class HfApi:
|
|
|
2477
2550
|
expand (`List[ExpandModelProperty_T]`, *optional*):
|
|
2478
2551
|
List properties to return in the response. When used, only the properties in the list will be returned.
|
|
2479
2552
|
This parameter cannot be used if `securityStatus` or `files_metadata` are passed.
|
|
2480
|
-
Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"` and `"
|
|
2553
|
+
Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
|
|
2481
2554
|
token (Union[bool, str, None], optional):
|
|
2482
2555
|
A valid user access token (string). Defaults to the locally saved
|
|
2483
2556
|
token, which is the recommended method for authentication (see
|
|
@@ -2551,7 +2624,7 @@ class HfApi:
|
|
|
2551
2624
|
expand (`List[ExpandDatasetProperty_T]`, *optional*):
|
|
2552
2625
|
List properties to return in the response. When used, only the properties in the list will be returned.
|
|
2553
2626
|
This parameter cannot be used if `files_metadata` is passed.
|
|
2554
|
-
Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`,`"usedStorage"` and `"
|
|
2627
|
+
Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`,`"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
|
|
2555
2628
|
token (Union[bool, str, None], optional):
|
|
2556
2629
|
A valid user access token (string). Defaults to the locally saved
|
|
2557
2630
|
token, which is the recommended method for authentication (see
|
|
@@ -2624,7 +2697,7 @@ class HfApi:
|
|
|
2624
2697
|
expand (`List[ExpandSpaceProperty_T]`, *optional*):
|
|
2625
2698
|
List properties to return in the response. When used, only the properties in the list will be returned.
|
|
2626
2699
|
This parameter cannot be used if `full` is passed.
|
|
2627
|
-
Possible values are `"author"`, `"cardData"`, `"createdAt"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"` and `"
|
|
2700
|
+
Possible values are `"author"`, `"cardData"`, `"createdAt"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, `"resourceGroup"` and `"xetEnabled"`.
|
|
2628
2701
|
token (Union[bool, str, None], optional):
|
|
2629
2702
|
A valid user access token (string). Defaults to the locally saved
|
|
2630
2703
|
token, which is the recommended method for authentication (see
|
|
@@ -3379,6 +3452,131 @@ class HfApi:
|
|
|
3379
3452
|
response = get_session().post(url=url, headers=headers, json={"message": commit_message})
|
|
3380
3453
|
hf_raise_for_status(response)
|
|
3381
3454
|
|
|
3455
|
+
@validate_hf_hub_args
|
|
3456
|
+
def list_lfs_files(
|
|
3457
|
+
self,
|
|
3458
|
+
repo_id: str,
|
|
3459
|
+
*,
|
|
3460
|
+
repo_type: Optional[str] = None,
|
|
3461
|
+
token: Union[bool, str, None] = None,
|
|
3462
|
+
) -> Iterable[LFSFileInfo]:
|
|
3463
|
+
"""
|
|
3464
|
+
List all LFS files in a repo on the Hub.
|
|
3465
|
+
|
|
3466
|
+
This is primarily useful to count how much storage a repo is using and to eventually clean up large files
|
|
3467
|
+
with [`permanently_delete_lfs_files`]. Note that this would be a permanent action that will affect all commits
|
|
3468
|
+
referencing this deleted files and that cannot be undone.
|
|
3469
|
+
|
|
3470
|
+
Args:
|
|
3471
|
+
repo_id (`str`):
|
|
3472
|
+
The repository for which you are listing LFS files.
|
|
3473
|
+
repo_type (`str`, *optional*):
|
|
3474
|
+
Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or
|
|
3475
|
+
`"model"` if listing from a model. Default is `None`.
|
|
3476
|
+
token (Union[bool, str, None], optional):
|
|
3477
|
+
A valid user access token (string). Defaults to the locally saved
|
|
3478
|
+
token, which is the recommended method for authentication (see
|
|
3479
|
+
https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
|
|
3480
|
+
To disable authentication, pass `False`.
|
|
3481
|
+
|
|
3482
|
+
Returns:
|
|
3483
|
+
`Iterable[LFSFileInfo]`: An iterator of [`LFSFileInfo`] objects.
|
|
3484
|
+
|
|
3485
|
+
Example:
|
|
3486
|
+
```py
|
|
3487
|
+
>>> from huggingface_hub import HfApi
|
|
3488
|
+
>>> api = HfApi()
|
|
3489
|
+
>>> lfs_files = api.list_lfs_files("username/my-cool-repo")
|
|
3490
|
+
|
|
3491
|
+
# Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
|
|
3492
|
+
# e.g. select only LFS files in the "checkpoints" folder
|
|
3493
|
+
>>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
|
|
3494
|
+
|
|
3495
|
+
# Permanently delete LFS files
|
|
3496
|
+
>>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
|
|
3497
|
+
```
|
|
3498
|
+
"""
|
|
3499
|
+
# Prepare request
|
|
3500
|
+
if repo_type is None:
|
|
3501
|
+
repo_type = constants.REPO_TYPE_MODEL
|
|
3502
|
+
url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files"
|
|
3503
|
+
headers = self._build_hf_headers(token=token)
|
|
3504
|
+
|
|
3505
|
+
# Paginate over LFS items
|
|
3506
|
+
for item in paginate(url, params={}, headers=headers):
|
|
3507
|
+
yield LFSFileInfo(**item)
|
|
3508
|
+
|
|
3509
|
+
@validate_hf_hub_args
|
|
3510
|
+
def permanently_delete_lfs_files(
|
|
3511
|
+
self,
|
|
3512
|
+
repo_id: str,
|
|
3513
|
+
lfs_files: Iterable[LFSFileInfo],
|
|
3514
|
+
*,
|
|
3515
|
+
rewrite_history: bool = True,
|
|
3516
|
+
repo_type: Optional[str] = None,
|
|
3517
|
+
token: Union[bool, str, None] = None,
|
|
3518
|
+
) -> None:
|
|
3519
|
+
"""
|
|
3520
|
+
Permanently delete LFS files from a repo on the Hub.
|
|
3521
|
+
|
|
3522
|
+
<Tip warning={true}>
|
|
3523
|
+
|
|
3524
|
+
This is a permanent action that will affect all commits referencing the deleted files and might corrupt your
|
|
3525
|
+
repository. This is a non-revertible operation. Use it only if you know what you are doing.
|
|
3526
|
+
|
|
3527
|
+
</Tip>
|
|
3528
|
+
|
|
3529
|
+
Args:
|
|
3530
|
+
repo_id (`str`):
|
|
3531
|
+
The repository for which you are listing LFS files.
|
|
3532
|
+
lfs_files (`Iterable[LFSFileInfo]`):
|
|
3533
|
+
An iterable of [`LFSFileInfo`] items to permanently delete from the repo. Use [`list_lfs_files`] to list
|
|
3534
|
+
all LFS files from a repo.
|
|
3535
|
+
rewrite_history (`bool`, *optional*, default to `True`):
|
|
3536
|
+
Whether to rewrite repository history to remove file pointers referencing the deleted LFS files (recommended).
|
|
3537
|
+
repo_type (`str`, *optional*):
|
|
3538
|
+
Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or
|
|
3539
|
+
`"model"` if listing from a model. Default is `None`.
|
|
3540
|
+
token (Union[bool, str, None], optional):
|
|
3541
|
+
A valid user access token (string). Defaults to the locally saved
|
|
3542
|
+
token, which is the recommended method for authentication (see
|
|
3543
|
+
https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
|
|
3544
|
+
To disable authentication, pass `False`.
|
|
3545
|
+
|
|
3546
|
+
Example:
|
|
3547
|
+
```py
|
|
3548
|
+
>>> from huggingface_hub import HfApi
|
|
3549
|
+
>>> api = HfApi()
|
|
3550
|
+
>>> lfs_files = api.list_lfs_files("username/my-cool-repo")
|
|
3551
|
+
|
|
3552
|
+
# Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`.
|
|
3553
|
+
# e.g. select only LFS files in the "checkpoints" folder
|
|
3554
|
+
>>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/"))
|
|
3555
|
+
|
|
3556
|
+
# Permanently delete LFS files
|
|
3557
|
+
>>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete)
|
|
3558
|
+
```
|
|
3559
|
+
"""
|
|
3560
|
+
# Prepare request
|
|
3561
|
+
if repo_type is None:
|
|
3562
|
+
repo_type = constants.REPO_TYPE_MODEL
|
|
3563
|
+
url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files/batch"
|
|
3564
|
+
headers = self._build_hf_headers(token=token)
|
|
3565
|
+
|
|
3566
|
+
# Delete LFS items by batches of 1000
|
|
3567
|
+
for batch in chunk_iterable(lfs_files, 1000):
|
|
3568
|
+
shas = [item.file_oid for item in batch]
|
|
3569
|
+
if len(shas) == 0:
|
|
3570
|
+
return
|
|
3571
|
+
payload = {
|
|
3572
|
+
"deletions": {
|
|
3573
|
+
"sha": shas,
|
|
3574
|
+
"rewriteHistory": rewrite_history,
|
|
3575
|
+
}
|
|
3576
|
+
}
|
|
3577
|
+
response = get_session().post(url, headers=headers, json=payload)
|
|
3578
|
+
hf_raise_for_status(response)
|
|
3579
|
+
|
|
3382
3580
|
@validate_hf_hub_args
|
|
3383
3581
|
def create_repo(
|
|
3384
3582
|
self,
|
|
@@ -3642,6 +3840,7 @@ class HfApi:
|
|
|
3642
3840
|
private: Optional[bool] = None,
|
|
3643
3841
|
token: Union[str, bool, None] = None,
|
|
3644
3842
|
repo_type: Optional[str] = None,
|
|
3843
|
+
xet_enabled: Optional[bool] = None,
|
|
3645
3844
|
) -> None:
|
|
3646
3845
|
"""
|
|
3647
3846
|
Update the settings of a repository, including gated access and visibility.
|
|
@@ -3667,7 +3866,8 @@ class HfApi:
|
|
|
3667
3866
|
repo_type (`str`, *optional*):
|
|
3668
3867
|
The type of the repository to update settings from (`"model"`, `"dataset"` or `"space"`).
|
|
3669
3868
|
Defaults to `"model"`.
|
|
3670
|
-
|
|
3869
|
+
xet_enabled (`bool`, *optional*):
|
|
3870
|
+
Whether the repository should be enabled for Xet Storage.
|
|
3671
3871
|
Raises:
|
|
3672
3872
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
3673
3873
|
If gated is not one of "auto", "manual", or False.
|
|
@@ -3685,13 +3885,6 @@ class HfApi:
|
|
|
3685
3885
|
if repo_type is None:
|
|
3686
3886
|
repo_type = constants.REPO_TYPE_MODEL # default repo type
|
|
3687
3887
|
|
|
3688
|
-
# Check if both gated and private are None
|
|
3689
|
-
if gated is None and private is None:
|
|
3690
|
-
raise ValueError("At least one of 'gated' or 'private' must be provided.")
|
|
3691
|
-
|
|
3692
|
-
# Build headers
|
|
3693
|
-
headers = self._build_hf_headers(token=token)
|
|
3694
|
-
|
|
3695
3888
|
# Prepare the JSON payload for the PUT request
|
|
3696
3889
|
payload: Dict = {}
|
|
3697
3890
|
|
|
@@ -3703,6 +3896,15 @@ class HfApi:
|
|
|
3703
3896
|
if private is not None:
|
|
3704
3897
|
payload["private"] = private
|
|
3705
3898
|
|
|
3899
|
+
if xet_enabled is not None:
|
|
3900
|
+
payload["xetEnabled"] = xet_enabled
|
|
3901
|
+
|
|
3902
|
+
if len(payload) == 0:
|
|
3903
|
+
raise ValueError("At least one setting must be updated.")
|
|
3904
|
+
|
|
3905
|
+
# Build headers
|
|
3906
|
+
headers = self._build_hf_headers(token=token)
|
|
3907
|
+
|
|
3706
3908
|
r = get_session().put(
|
|
3707
3909
|
url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings",
|
|
3708
3910
|
headers=headers,
|
|
@@ -4240,20 +4442,45 @@ class HfApi:
|
|
|
4240
4442
|
f"Skipped upload for {len(new_lfs_additions) - len(new_lfs_additions_to_upload)} LFS file(s) "
|
|
4241
4443
|
"(ignored by gitignore file)."
|
|
4242
4444
|
)
|
|
4243
|
-
|
|
4244
|
-
|
|
4245
|
-
|
|
4246
|
-
|
|
4247
|
-
|
|
4248
|
-
|
|
4249
|
-
|
|
4250
|
-
endpoint=self.endpoint,
|
|
4251
|
-
num_threads=num_threads,
|
|
4445
|
+
# Prepare upload parameters
|
|
4446
|
+
upload_kwargs = {
|
|
4447
|
+
"additions": new_lfs_additions_to_upload,
|
|
4448
|
+
"repo_type": repo_type,
|
|
4449
|
+
"repo_id": repo_id,
|
|
4450
|
+
"headers": headers,
|
|
4451
|
+
"endpoint": self.endpoint,
|
|
4252
4452
|
# If `create_pr`, we don't want to check user permission on the revision as users with read permission
|
|
4253
4453
|
# should still be able to create PRs even if they don't have write permission on the target branch of the
|
|
4254
4454
|
# PR (i.e. `revision`).
|
|
4255
|
-
revision
|
|
4455
|
+
"revision": revision if not create_pr else None,
|
|
4456
|
+
}
|
|
4457
|
+
# Upload files using Xet protocol if all of the following are true:
|
|
4458
|
+
# - xet is enabled for the repo,
|
|
4459
|
+
# - the files are provided as str or paths objects,
|
|
4460
|
+
# - the library is installed.
|
|
4461
|
+
# Otherwise, default back to LFS.
|
|
4462
|
+
xet_enabled = self.repo_info(
|
|
4463
|
+
repo_id=repo_id,
|
|
4464
|
+
repo_type=repo_type,
|
|
4465
|
+
revision=unquote(revision) if revision is not None else revision,
|
|
4466
|
+
expand="xetEnabled",
|
|
4467
|
+
token=token,
|
|
4468
|
+
).xet_enabled
|
|
4469
|
+
has_binary_data = any(
|
|
4470
|
+
isinstance(addition.path_or_fileobj, (bytes, io.BufferedIOBase))
|
|
4471
|
+
for addition in new_lfs_additions_to_upload
|
|
4256
4472
|
)
|
|
4473
|
+
if xet_enabled and not has_binary_data and is_xet_available():
|
|
4474
|
+
logger.info("Uploading files using Xet Storage..")
|
|
4475
|
+
_upload_xet_files(**upload_kwargs, create_pr=create_pr) # type: ignore [arg-type]
|
|
4476
|
+
else:
|
|
4477
|
+
if xet_enabled and is_xet_available():
|
|
4478
|
+
if has_binary_data:
|
|
4479
|
+
logger.warning(
|
|
4480
|
+
"Uploading files as bytes or binary IO objects is not supported by Xet Storage. "
|
|
4481
|
+
"Falling back to HTTP upload."
|
|
4482
|
+
)
|
|
4483
|
+
_upload_lfs_files(**upload_kwargs, num_threads=num_threads) # type: ignore [arg-type]
|
|
4257
4484
|
for addition in new_lfs_additions_to_upload:
|
|
4258
4485
|
addition._is_uploaded = True
|
|
4259
4486
|
if free_memory:
|
|
@@ -7485,6 +7712,94 @@ class HfApi:
|
|
|
7485
7712
|
|
|
7486
7713
|
return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
|
|
7487
7714
|
|
|
7715
|
+
@experimental
|
|
7716
|
+
@validate_hf_hub_args
|
|
7717
|
+
def create_inference_endpoint_from_catalog(
|
|
7718
|
+
self,
|
|
7719
|
+
repo_id: str,
|
|
7720
|
+
*,
|
|
7721
|
+
name: Optional[str] = None,
|
|
7722
|
+
token: Union[bool, str, None] = None,
|
|
7723
|
+
namespace: Optional[str] = None,
|
|
7724
|
+
) -> InferenceEndpoint:
|
|
7725
|
+
"""Create a new Inference Endpoint from a model in the Hugging Face Inference Catalog.
|
|
7726
|
+
|
|
7727
|
+
The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
|
|
7728
|
+
and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list
|
|
7729
|
+
of available models in the catalog.
|
|
7730
|
+
|
|
7731
|
+
Args:
|
|
7732
|
+
repo_id (`str`):
|
|
7733
|
+
The ID of the model in the catalog to deploy as an Inference Endpoint.
|
|
7734
|
+
name (`str`, *optional*):
|
|
7735
|
+
The unique name for the new Inference Endpoint. If not provided, a random name will be generated.
|
|
7736
|
+
token (Union[bool, str, None], optional):
|
|
7737
|
+
A valid user access token (string). Defaults to the locally saved
|
|
7738
|
+
token, which is the recommended method for authentication (see
|
|
7739
|
+
https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
|
|
7740
|
+
namespace (`str`, *optional*):
|
|
7741
|
+
The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
|
|
7742
|
+
|
|
7743
|
+
Returns:
|
|
7744
|
+
[`InferenceEndpoint`]: information about the new Inference Endpoint.
|
|
7745
|
+
|
|
7746
|
+
<Tip warning={true}>
|
|
7747
|
+
|
|
7748
|
+
`create_inference_endpoint_from_catalog` is experimental. Its API is subject to change in the future. Please provide feedback
|
|
7749
|
+
if you have any suggestions or requests.
|
|
7750
|
+
|
|
7751
|
+
</Tip>
|
|
7752
|
+
"""
|
|
7753
|
+
token = token or self.token or get_token()
|
|
7754
|
+
payload: Dict = {
|
|
7755
|
+
"namespace": namespace or self._get_namespace(token=token),
|
|
7756
|
+
"repoId": repo_id,
|
|
7757
|
+
}
|
|
7758
|
+
if name is not None:
|
|
7759
|
+
payload["endpointName"] = name
|
|
7760
|
+
|
|
7761
|
+
response = get_session().post(
|
|
7762
|
+
f"{constants.INFERENCE_CATALOG_ENDPOINT}/deploy",
|
|
7763
|
+
headers=self._build_hf_headers(token=token),
|
|
7764
|
+
json=payload,
|
|
7765
|
+
)
|
|
7766
|
+
hf_raise_for_status(response)
|
|
7767
|
+
data = response.json()["endpoint"]
|
|
7768
|
+
return InferenceEndpoint.from_raw(data, namespace=data["name"], token=token)
|
|
7769
|
+
|
|
7770
|
+
@experimental
|
|
7771
|
+
@validate_hf_hub_args
|
|
7772
|
+
def list_inference_catalog(self, *, token: Union[bool, str, None] = None) -> List[str]:
|
|
7773
|
+
"""List models available in the Hugging Face Inference Catalog.
|
|
7774
|
+
|
|
7775
|
+
The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
|
|
7776
|
+
and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list
|
|
7777
|
+
of available models in the catalog.
|
|
7778
|
+
|
|
7779
|
+
Use [`create_inference_endpoint_from_catalog`] to deploy a model from the catalog.
|
|
7780
|
+
|
|
7781
|
+
Args:
|
|
7782
|
+
token (Union[bool, str, None], optional):
|
|
7783
|
+
A valid user access token (string). Defaults to the locally saved
|
|
7784
|
+
token, which is the recommended method for authentication (see
|
|
7785
|
+
https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
|
|
7786
|
+
|
|
7787
|
+
Returns:
|
|
7788
|
+
List[`str`]: A list of model IDs available in the catalog.
|
|
7789
|
+
<Tip warning={true}>
|
|
7790
|
+
|
|
7791
|
+
`list_inference_catalog` is experimental. Its API is subject to change in the future. Please provide feedback
|
|
7792
|
+
if you have any suggestions or requests.
|
|
7793
|
+
|
|
7794
|
+
</Tip>
|
|
7795
|
+
"""
|
|
7796
|
+
response = get_session().get(
|
|
7797
|
+
f"{constants.INFERENCE_CATALOG_ENDPOINT}/repo-list",
|
|
7798
|
+
headers=self._build_hf_headers(token=token),
|
|
7799
|
+
)
|
|
7800
|
+
hf_raise_for_status(response)
|
|
7801
|
+
return response.json()["models"]
|
|
7802
|
+
|
|
7488
7803
|
def get_inference_endpoint(
|
|
7489
7804
|
self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None
|
|
7490
7805
|
) -> InferenceEndpoint:
|
|
@@ -9545,7 +9860,6 @@ create_repo = api.create_repo
|
|
|
9545
9860
|
delete_repo = api.delete_repo
|
|
9546
9861
|
update_repo_visibility = api.update_repo_visibility
|
|
9547
9862
|
update_repo_settings = api.update_repo_settings
|
|
9548
|
-
super_squash_history = api.super_squash_history
|
|
9549
9863
|
move_repo = api.move_repo
|
|
9550
9864
|
upload_file = api.upload_file
|
|
9551
9865
|
upload_folder = api.upload_folder
|
|
@@ -9560,6 +9874,11 @@ create_tag = api.create_tag
|
|
|
9560
9874
|
delete_tag = api.delete_tag
|
|
9561
9875
|
get_full_repo_name = api.get_full_repo_name
|
|
9562
9876
|
|
|
9877
|
+
# Danger-zone API
|
|
9878
|
+
super_squash_history = api.super_squash_history
|
|
9879
|
+
list_lfs_files = api.list_lfs_files
|
|
9880
|
+
permanently_delete_lfs_files = api.permanently_delete_lfs_files
|
|
9881
|
+
|
|
9563
9882
|
# Safetensors helpers
|
|
9564
9883
|
get_safetensors_metadata = api.get_safetensors_metadata
|
|
9565
9884
|
parse_safetensors_file_metadata = api.parse_safetensors_file_metadata
|
|
@@ -9607,6 +9926,8 @@ delete_inference_endpoint = api.delete_inference_endpoint
|
|
|
9607
9926
|
pause_inference_endpoint = api.pause_inference_endpoint
|
|
9608
9927
|
resume_inference_endpoint = api.resume_inference_endpoint
|
|
9609
9928
|
scale_to_zero_inference_endpoint = api.scale_to_zero_inference_endpoint
|
|
9929
|
+
create_inference_endpoint_from_catalog = api.create_inference_endpoint_from_catalog
|
|
9930
|
+
list_inference_catalog = api.list_inference_catalog
|
|
9610
9931
|
|
|
9611
9932
|
# Collections API
|
|
9612
9933
|
get_collection = api.get_collection
|