huggingface-hub 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +31 -5
- huggingface_hub/_inference_endpoints.py +348 -0
- huggingface_hub/_login.py +9 -7
- huggingface_hub/_multi_commits.py +1 -1
- huggingface_hub/_snapshot_download.py +6 -7
- huggingface_hub/_space_api.py +7 -4
- huggingface_hub/_tensorboard_logger.py +1 -0
- huggingface_hub/_webhooks_payload.py +7 -7
- huggingface_hub/commands/lfs.py +3 -6
- huggingface_hub/commands/user.py +1 -4
- huggingface_hub/constants.py +27 -0
- huggingface_hub/file_download.py +142 -134
- huggingface_hub/hf_api.py +1036 -501
- huggingface_hub/hf_file_system.py +57 -12
- huggingface_hub/hub_mixin.py +3 -5
- huggingface_hub/inference/_client.py +43 -8
- huggingface_hub/inference/_common.py +8 -16
- huggingface_hub/inference/_generated/_async_client.py +41 -8
- huggingface_hub/inference/_text_generation.py +43 -0
- huggingface_hub/inference_api.py +1 -1
- huggingface_hub/lfs.py +32 -14
- huggingface_hub/repocard_data.py +7 -0
- huggingface_hub/repository.py +19 -3
- huggingface_hub/templates/modelcard_template.md +1 -1
- huggingface_hub/utils/__init__.py +1 -1
- huggingface_hub/utils/_cache_assets.py +3 -3
- huggingface_hub/utils/_cache_manager.py +6 -7
- huggingface_hub/utils/_datetime.py +3 -1
- huggingface_hub/utils/_errors.py +10 -0
- huggingface_hub/utils/_hf_folder.py +4 -2
- huggingface_hub/utils/_http.py +10 -1
- huggingface_hub/utils/_runtime.py +4 -2
- huggingface_hub/utils/endpoint_helpers.py +27 -175
- huggingface_hub/utils/insecure_hashlib.py +34 -0
- huggingface_hub/utils/logging.py +4 -6
- huggingface_hub/utils/sha.py +2 -1
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/METADATA +16 -15
- huggingface_hub-0.19.0.dist-info/RECORD +74 -0
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/WHEEL +1 -1
- huggingface_hub-0.18.0.dist-info/RECORD +0 -72
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/top_level.txt +0 -0
huggingface_hub/__init__.py
CHANGED
|
@@ -46,7 +46,7 @@ import sys
|
|
|
46
46
|
from typing import TYPE_CHECKING
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
__version__ = "0.
|
|
49
|
+
__version__ = "0.19.0"
|
|
50
50
|
|
|
51
51
|
# Alphabetical order of definitions is ensured in tests
|
|
52
52
|
# WARNING: any comment added in this dictionary definition will be lost when
|
|
@@ -55,6 +55,13 @@ _SUBMOD_ATTRS = {
|
|
|
55
55
|
"_commit_scheduler": [
|
|
56
56
|
"CommitScheduler",
|
|
57
57
|
],
|
|
58
|
+
"_inference_endpoints": [
|
|
59
|
+
"InferenceEndpoint",
|
|
60
|
+
"InferenceEndpointError",
|
|
61
|
+
"InferenceEndpointStatus",
|
|
62
|
+
"InferenceEndpointTimeoutError",
|
|
63
|
+
"InferenceEndpointType",
|
|
64
|
+
],
|
|
58
65
|
"_login": [
|
|
59
66
|
"interpreter_login",
|
|
60
67
|
"login",
|
|
@@ -136,12 +143,10 @@ _SUBMOD_ATTRS = {
|
|
|
136
143
|
"CommitOperationAdd",
|
|
137
144
|
"CommitOperationCopy",
|
|
138
145
|
"CommitOperationDelete",
|
|
139
|
-
"DatasetSearchArguments",
|
|
140
146
|
"GitCommitInfo",
|
|
141
147
|
"GitRefInfo",
|
|
142
148
|
"GitRefs",
|
|
143
149
|
"HfApi",
|
|
144
|
-
"ModelSearchArguments",
|
|
145
150
|
"RepoUrl",
|
|
146
151
|
"User",
|
|
147
152
|
"UserLikes",
|
|
@@ -155,6 +160,7 @@ _SUBMOD_ATTRS = {
|
|
|
155
160
|
"create_commit",
|
|
156
161
|
"create_commits_on_pr",
|
|
157
162
|
"create_discussion",
|
|
163
|
+
"create_inference_endpoint",
|
|
158
164
|
"create_pull_request",
|
|
159
165
|
"create_repo",
|
|
160
166
|
"create_tag",
|
|
@@ -164,6 +170,7 @@ _SUBMOD_ATTRS = {
|
|
|
164
170
|
"delete_collection_item",
|
|
165
171
|
"delete_file",
|
|
166
172
|
"delete_folder",
|
|
173
|
+
"delete_inference_endpoint",
|
|
167
174
|
"delete_repo",
|
|
168
175
|
"delete_space_secret",
|
|
169
176
|
"delete_space_storage",
|
|
@@ -176,6 +183,7 @@ _SUBMOD_ATTRS = {
|
|
|
176
183
|
"get_dataset_tags",
|
|
177
184
|
"get_discussion_details",
|
|
178
185
|
"get_full_repo_name",
|
|
186
|
+
"get_inference_endpoint",
|
|
179
187
|
"get_model_tags",
|
|
180
188
|
"get_repo_discussions",
|
|
181
189
|
"get_space_runtime",
|
|
@@ -184,6 +192,7 @@ _SUBMOD_ATTRS = {
|
|
|
184
192
|
"like",
|
|
185
193
|
"list_datasets",
|
|
186
194
|
"list_files_info",
|
|
195
|
+
"list_inference_endpoints",
|
|
187
196
|
"list_liked_repos",
|
|
188
197
|
"list_metrics",
|
|
189
198
|
"list_models",
|
|
@@ -195,6 +204,7 @@ _SUBMOD_ATTRS = {
|
|
|
195
204
|
"merge_pull_request",
|
|
196
205
|
"model_info",
|
|
197
206
|
"move_repo",
|
|
207
|
+
"pause_inference_endpoint",
|
|
198
208
|
"pause_space",
|
|
199
209
|
"preupload_lfs_files",
|
|
200
210
|
"rename_discussion",
|
|
@@ -204,13 +214,16 @@ _SUBMOD_ATTRS = {
|
|
|
204
214
|
"request_space_hardware",
|
|
205
215
|
"request_space_storage",
|
|
206
216
|
"restart_space",
|
|
217
|
+
"resume_inference_endpoint",
|
|
207
218
|
"run_as_future",
|
|
219
|
+
"scale_to_zero_inference_endpoint",
|
|
208
220
|
"set_space_sleep_time",
|
|
209
221
|
"space_info",
|
|
210
222
|
"super_squash_history",
|
|
211
223
|
"unlike",
|
|
212
224
|
"update_collection_item",
|
|
213
225
|
"update_collection_metadata",
|
|
226
|
+
"update_inference_endpoint",
|
|
214
227
|
"update_repo_visibility",
|
|
215
228
|
"upload_file",
|
|
216
229
|
"upload_folder",
|
|
@@ -380,6 +393,13 @@ __getattr__, __dir__, __all__ = _attach(__name__, submodules=[], submod_attrs=_S
|
|
|
380
393
|
# ```
|
|
381
394
|
if TYPE_CHECKING: # pragma: no cover
|
|
382
395
|
from ._commit_scheduler import CommitScheduler # noqa: F401
|
|
396
|
+
from ._inference_endpoints import (
|
|
397
|
+
InferenceEndpoint, # noqa: F401
|
|
398
|
+
InferenceEndpointError, # noqa: F401
|
|
399
|
+
InferenceEndpointStatus, # noqa: F401
|
|
400
|
+
InferenceEndpointTimeoutError, # noqa: F401
|
|
401
|
+
InferenceEndpointType, # noqa: F401
|
|
402
|
+
)
|
|
383
403
|
from ._login import (
|
|
384
404
|
interpreter_login, # noqa: F401
|
|
385
405
|
login, # noqa: F401
|
|
@@ -457,12 +477,10 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
457
477
|
CommitOperationAdd, # noqa: F401
|
|
458
478
|
CommitOperationCopy, # noqa: F401
|
|
459
479
|
CommitOperationDelete, # noqa: F401
|
|
460
|
-
DatasetSearchArguments, # noqa: F401
|
|
461
480
|
GitCommitInfo, # noqa: F401
|
|
462
481
|
GitRefInfo, # noqa: F401
|
|
463
482
|
GitRefs, # noqa: F401
|
|
464
483
|
HfApi, # noqa: F401
|
|
465
|
-
ModelSearchArguments, # noqa: F401
|
|
466
484
|
RepoUrl, # noqa: F401
|
|
467
485
|
User, # noqa: F401
|
|
468
486
|
UserLikes, # noqa: F401
|
|
@@ -476,6 +494,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
476
494
|
create_commit, # noqa: F401
|
|
477
495
|
create_commits_on_pr, # noqa: F401
|
|
478
496
|
create_discussion, # noqa: F401
|
|
497
|
+
create_inference_endpoint, # noqa: F401
|
|
479
498
|
create_pull_request, # noqa: F401
|
|
480
499
|
create_repo, # noqa: F401
|
|
481
500
|
create_tag, # noqa: F401
|
|
@@ -485,6 +504,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
485
504
|
delete_collection_item, # noqa: F401
|
|
486
505
|
delete_file, # noqa: F401
|
|
487
506
|
delete_folder, # noqa: F401
|
|
507
|
+
delete_inference_endpoint, # noqa: F401
|
|
488
508
|
delete_repo, # noqa: F401
|
|
489
509
|
delete_space_secret, # noqa: F401
|
|
490
510
|
delete_space_storage, # noqa: F401
|
|
@@ -497,6 +517,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
497
517
|
get_dataset_tags, # noqa: F401
|
|
498
518
|
get_discussion_details, # noqa: F401
|
|
499
519
|
get_full_repo_name, # noqa: F401
|
|
520
|
+
get_inference_endpoint, # noqa: F401
|
|
500
521
|
get_model_tags, # noqa: F401
|
|
501
522
|
get_repo_discussions, # noqa: F401
|
|
502
523
|
get_space_runtime, # noqa: F401
|
|
@@ -505,6 +526,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
505
526
|
like, # noqa: F401
|
|
506
527
|
list_datasets, # noqa: F401
|
|
507
528
|
list_files_info, # noqa: F401
|
|
529
|
+
list_inference_endpoints, # noqa: F401
|
|
508
530
|
list_liked_repos, # noqa: F401
|
|
509
531
|
list_metrics, # noqa: F401
|
|
510
532
|
list_models, # noqa: F401
|
|
@@ -516,6 +538,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
516
538
|
merge_pull_request, # noqa: F401
|
|
517
539
|
model_info, # noqa: F401
|
|
518
540
|
move_repo, # noqa: F401
|
|
541
|
+
pause_inference_endpoint, # noqa: F401
|
|
519
542
|
pause_space, # noqa: F401
|
|
520
543
|
preupload_lfs_files, # noqa: F401
|
|
521
544
|
rename_discussion, # noqa: F401
|
|
@@ -525,13 +548,16 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
525
548
|
request_space_hardware, # noqa: F401
|
|
526
549
|
request_space_storage, # noqa: F401
|
|
527
550
|
restart_space, # noqa: F401
|
|
551
|
+
resume_inference_endpoint, # noqa: F401
|
|
528
552
|
run_as_future, # noqa: F401
|
|
553
|
+
scale_to_zero_inference_endpoint, # noqa: F401
|
|
529
554
|
set_space_sleep_time, # noqa: F401
|
|
530
555
|
space_info, # noqa: F401
|
|
531
556
|
super_squash_history, # noqa: F401
|
|
532
557
|
unlike, # noqa: F401
|
|
533
558
|
update_collection_item, # noqa: F401
|
|
534
559
|
update_collection_metadata, # noqa: F401
|
|
560
|
+
update_inference_endpoint, # noqa: F401
|
|
535
561
|
update_repo_visibility, # noqa: F401
|
|
536
562
|
upload_file, # noqa: F401
|
|
537
563
|
upload_folder, # noqa: F401
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import TYPE_CHECKING, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from .inference._client import InferenceClient
|
|
8
|
+
from .inference._generated._async_client import AsyncInferenceClient
|
|
9
|
+
from .utils import logging, parse_datetime
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from .hf_api import HfApi
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
logger = logging.get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class InferenceEndpointError(Exception):
|
|
20
|
+
"""Generic exception when dealing with Inference Endpoints."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class InferenceEndpointTimeoutError(InferenceEndpointError, TimeoutError):
|
|
24
|
+
"""Exception for timeouts while waiting for Inference Endpoint."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class InferenceEndpointStatus(str, Enum):
|
|
28
|
+
PENDING = "pending"
|
|
29
|
+
INITIALIZING = "initializing"
|
|
30
|
+
UPDATING = "updating"
|
|
31
|
+
UPDATE_FAILED = "updateFailed"
|
|
32
|
+
RUNNING = "running"
|
|
33
|
+
PAUSED = "paused"
|
|
34
|
+
FAILED = "failed"
|
|
35
|
+
SCALED_TO_ZERO = "scaledToZero"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class InferenceEndpointType(str, Enum):
|
|
39
|
+
PUBlIC = "public"
|
|
40
|
+
PROTECTED = "protected"
|
|
41
|
+
PRIVATE = "private"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class InferenceEndpoint:
|
|
46
|
+
"""
|
|
47
|
+
Contains information about a deployed Inference Endpoint.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
name (`str`):
|
|
51
|
+
The unique name of the Inference Endpoint.
|
|
52
|
+
namespace (`str`):
|
|
53
|
+
The namespace where the Inference Endpoint is located.
|
|
54
|
+
repository (`str`):
|
|
55
|
+
The name of the model repository deployed on this Inference Endpoint.
|
|
56
|
+
status ([`InferenceEndpointStatus`]):
|
|
57
|
+
The current status of the Inference Endpoint.
|
|
58
|
+
url (`str`, *optional*):
|
|
59
|
+
The URL of the Inference Endpoint, if available. Only a deployed Inference Endpoint will have a URL.
|
|
60
|
+
framework (`str`):
|
|
61
|
+
The machine learning framework used for the model.
|
|
62
|
+
revision (`str`):
|
|
63
|
+
The specific model revision deployed on the Inference Endpoint.
|
|
64
|
+
task (`str`):
|
|
65
|
+
The task associated with the deployed model.
|
|
66
|
+
created_at (`datetime.datetime`):
|
|
67
|
+
The timestamp when the Inference Endpoint was created.
|
|
68
|
+
updated_at (`datetime.datetime`):
|
|
69
|
+
The timestamp of the last update of the Inference Endpoint.
|
|
70
|
+
type ([`InferenceEndpointType`]):
|
|
71
|
+
The type of the Inference Endpoint (public, protected, private).
|
|
72
|
+
raw (`Dict`):
|
|
73
|
+
The raw dictionary data returned from the API.
|
|
74
|
+
token (`str`, *optional*):
|
|
75
|
+
Authentication token for the Inference Endpoint, if set when requesting the API.
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
```python
|
|
79
|
+
>>> from huggingface_hub import get_inference_endpoint
|
|
80
|
+
>>> endpoint = get_inference_endpoint("my-text-to-image")
|
|
81
|
+
>>> endpoint
|
|
82
|
+
InferenceEndpoint(name='my-text-to-image', ...)
|
|
83
|
+
|
|
84
|
+
# Get status
|
|
85
|
+
>>> endpoint.status
|
|
86
|
+
'running'
|
|
87
|
+
>>> endpoint.url
|
|
88
|
+
'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud'
|
|
89
|
+
|
|
90
|
+
# Run inference
|
|
91
|
+
>>> endpoint.client.text_to_image(...)
|
|
92
|
+
|
|
93
|
+
# Pause endpoint to save $$$
|
|
94
|
+
>>> endpoint.pause()
|
|
95
|
+
|
|
96
|
+
# ...
|
|
97
|
+
# Resume and wait for deployment
|
|
98
|
+
>>> endpoint.resume()
|
|
99
|
+
>>> endpoint.wait()
|
|
100
|
+
>>> endpoint.client.text_to_image(...)
|
|
101
|
+
```
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
# Field in __repr__
|
|
105
|
+
name: str = field(init=False)
|
|
106
|
+
namespace: str
|
|
107
|
+
repository: str = field(init=False)
|
|
108
|
+
status: InferenceEndpointStatus = field(init=False)
|
|
109
|
+
url: Optional[str] = field(init=False)
|
|
110
|
+
|
|
111
|
+
# Other fields
|
|
112
|
+
framework: str = field(repr=False, init=False)
|
|
113
|
+
revision: str = field(repr=False, init=False)
|
|
114
|
+
task: str = field(repr=False, init=False)
|
|
115
|
+
created_at: datetime = field(repr=False, init=False)
|
|
116
|
+
updated_at: datetime = field(repr=False, init=False)
|
|
117
|
+
type: InferenceEndpointType = field(repr=False, init=False)
|
|
118
|
+
|
|
119
|
+
# Raw dict from the API
|
|
120
|
+
raw: Dict = field(repr=False)
|
|
121
|
+
|
|
122
|
+
# Internal fields
|
|
123
|
+
_token: Optional[str] = field(repr=False, compare=False)
|
|
124
|
+
_api: "HfApi" = field(repr=False, compare=False)
|
|
125
|
+
|
|
126
|
+
@classmethod
|
|
127
|
+
def from_raw(
|
|
128
|
+
cls, raw: Dict, namespace: str, token: Optional[str] = None, api: Optional["HfApi"] = None
|
|
129
|
+
) -> "InferenceEndpoint":
|
|
130
|
+
"""Initialize object from raw dictionary."""
|
|
131
|
+
if api is None:
|
|
132
|
+
from .hf_api import HfApi
|
|
133
|
+
|
|
134
|
+
api = HfApi()
|
|
135
|
+
if token is None:
|
|
136
|
+
token = api.token
|
|
137
|
+
|
|
138
|
+
# All other fields are populated in __post_init__
|
|
139
|
+
return cls(raw=raw, namespace=namespace, _token=token, _api=api)
|
|
140
|
+
|
|
141
|
+
def __post_init__(self) -> None:
|
|
142
|
+
"""Populate fields from raw dictionary."""
|
|
143
|
+
self._populate_from_raw()
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def client(self) -> InferenceClient:
|
|
147
|
+
"""Returns a client to make predictions on this Inference Endpoint.
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
[`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed.
|
|
151
|
+
"""
|
|
152
|
+
if self.url is None:
|
|
153
|
+
raise InferenceEndpointError(
|
|
154
|
+
"Cannot create a client for this Inference Endpoint as it is not yet deployed. "
|
|
155
|
+
"Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
|
|
156
|
+
)
|
|
157
|
+
return InferenceClient(model=self.url, token=self._token)
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def async_client(self) -> AsyncInferenceClient:
|
|
161
|
+
"""Returns a client to make predictions on this Inference Endpoint.
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
[`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed.
|
|
165
|
+
"""
|
|
166
|
+
if self.url is None:
|
|
167
|
+
raise InferenceEndpointError(
|
|
168
|
+
"Cannot create a client for this Inference Endpoint as it is not yet deployed. "
|
|
169
|
+
"Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
|
|
170
|
+
)
|
|
171
|
+
return AsyncInferenceClient(model=self.url, token=self._token)
|
|
172
|
+
|
|
173
|
+
def wait(self, timeout: Optional[int] = None, refresh_every: int = 5) -> None:
|
|
174
|
+
"""Wait for the Inference Endpoint to be deployed.
|
|
175
|
+
|
|
176
|
+
Information from the server will be fetched every 1s. If the Inference Endpoint is not deployed after `timeout`
|
|
177
|
+
seconds, a [`InferenceEndpointTimeoutError`] will be raised. The [`InferenceEndpoint`] will be mutated in place with the latest
|
|
178
|
+
data.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
timeout (`int`, *optional*):
|
|
182
|
+
The maximum time to wait for the Inference Endpoint to be deployed, in seconds. If `None`, will wait
|
|
183
|
+
indefinitely.
|
|
184
|
+
refresh_every (`int`, *optional*):
|
|
185
|
+
The time to wait between each fetch of the Inference Endpoint status, in seconds. Defaults to 5s.
|
|
186
|
+
"""
|
|
187
|
+
if self.url is not None: # Means the endpoint is deployed
|
|
188
|
+
logger.info("Inference Endpoint is ready to be used.")
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
if timeout is not None and timeout < 0:
|
|
192
|
+
raise ValueError("`timeout` cannot be negative.")
|
|
193
|
+
if refresh_every <= 0:
|
|
194
|
+
raise ValueError("`refresh_every` must be positive.")
|
|
195
|
+
|
|
196
|
+
start = time.time()
|
|
197
|
+
while True:
|
|
198
|
+
self.fetch()
|
|
199
|
+
if self.url is not None: # Means the endpoint is deployed
|
|
200
|
+
logger.info("Inference Endpoint is ready to be used.")
|
|
201
|
+
return
|
|
202
|
+
if timeout is not None:
|
|
203
|
+
if time.time() - start > timeout:
|
|
204
|
+
raise InferenceEndpointTimeoutError("Timeout while waiting for Inference Endpoint to be deployed.")
|
|
205
|
+
logger.info(f"Inference Endpoint is not deployed yet ({self.status}). Waiting {refresh_every}s...")
|
|
206
|
+
time.sleep(refresh_every)
|
|
207
|
+
|
|
208
|
+
def fetch(self) -> "InferenceEndpoint":
|
|
209
|
+
"""Fetch latest information about the Inference Endpoint."""
|
|
210
|
+
obj = self._api.get_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)
|
|
211
|
+
self.raw = obj.raw
|
|
212
|
+
self._populate_from_raw()
|
|
213
|
+
return self
|
|
214
|
+
|
|
215
|
+
def update(
|
|
216
|
+
self,
|
|
217
|
+
*,
|
|
218
|
+
# Compute update
|
|
219
|
+
accelerator: Optional[str] = None,
|
|
220
|
+
instance_size: Optional[str] = None,
|
|
221
|
+
instance_type: Optional[str] = None,
|
|
222
|
+
min_replica: Optional[int] = None,
|
|
223
|
+
max_replica: Optional[int] = None,
|
|
224
|
+
# Model update
|
|
225
|
+
repository: Optional[str] = None,
|
|
226
|
+
framework: Optional[str] = None,
|
|
227
|
+
revision: Optional[str] = None,
|
|
228
|
+
task: Optional[str] = None,
|
|
229
|
+
) -> "InferenceEndpoint":
|
|
230
|
+
"""Update the Inference Endpoint.
|
|
231
|
+
|
|
232
|
+
This method allows the update of either the compute configuration, the deployed model, or both. All arguments are
|
|
233
|
+
optional but at least one must be provided.
|
|
234
|
+
|
|
235
|
+
This is an alias for [`HfApi.update_inference_endpoint`]. The current object is mutated in place with the
|
|
236
|
+
latest data from the server.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
accelerator (`str`, *optional*):
|
|
240
|
+
The hardware accelerator to be used for inference (e.g. `"cpu"`).
|
|
241
|
+
instance_size (`str`, *optional*):
|
|
242
|
+
The size or type of the instance to be used for hosting the model (e.g. `"large"`).
|
|
243
|
+
instance_type (`str`, *optional*):
|
|
244
|
+
The cloud instance type where the Inference Endpoint will be deployed (e.g. `"c6i"`).
|
|
245
|
+
min_replica (`int`, *optional*):
|
|
246
|
+
The minimum number of replicas (instances) to keep running for the Inference Endpoint.
|
|
247
|
+
max_replica (`int`, *optional*):
|
|
248
|
+
The maximum number of replicas (instances) to scale to for the Inference Endpoint.
|
|
249
|
+
|
|
250
|
+
repository (`str`, *optional*):
|
|
251
|
+
The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
|
|
252
|
+
framework (`str`, *optional*):
|
|
253
|
+
The machine learning framework used for the model (e.g. `"custom"`).
|
|
254
|
+
revision (`str`, *optional*):
|
|
255
|
+
The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
|
|
256
|
+
task (`str`, *optional*):
|
|
257
|
+
The task on which to deploy the model (e.g. `"text-classification"`).
|
|
258
|
+
"""
|
|
259
|
+
# Make API call
|
|
260
|
+
obj = self._api.update_inference_endpoint(
|
|
261
|
+
name=self.name,
|
|
262
|
+
namespace=self.namespace,
|
|
263
|
+
accelerator=accelerator,
|
|
264
|
+
instance_size=instance_size,
|
|
265
|
+
instance_type=instance_type,
|
|
266
|
+
min_replica=min_replica,
|
|
267
|
+
max_replica=max_replica,
|
|
268
|
+
repository=repository,
|
|
269
|
+
framework=framework,
|
|
270
|
+
revision=revision,
|
|
271
|
+
task=task,
|
|
272
|
+
token=self._token,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# Mutate current object
|
|
276
|
+
self.raw = obj.raw
|
|
277
|
+
self._populate_from_raw()
|
|
278
|
+
return self
|
|
279
|
+
|
|
280
|
+
def pause(self) -> "InferenceEndpoint":
|
|
281
|
+
"""Pause the Inference Endpoint.
|
|
282
|
+
|
|
283
|
+
A paused Inference Endpoint will not be charged. It can be resumed at any time using [`InferenceEndpoint.resume`].
|
|
284
|
+
This is different than scaling the Inference Endpoint to zero with [`InferenceEndpoint.scale_to_zero`], which
|
|
285
|
+
would be automatically restarted when a request is made to it.
|
|
286
|
+
|
|
287
|
+
This is an alias for [`HfApi.pause_inference_endpoint`]. The current object is mutated in place with the
|
|
288
|
+
latest data from the server.
|
|
289
|
+
"""
|
|
290
|
+
obj = self._api.pause_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)
|
|
291
|
+
self.raw = obj.raw
|
|
292
|
+
self._populate_from_raw()
|
|
293
|
+
return self
|
|
294
|
+
|
|
295
|
+
def resume(self) -> "InferenceEndpoint":
|
|
296
|
+
"""Resume the Inference Endpoint.
|
|
297
|
+
|
|
298
|
+
This is an alias for [`HfApi.resume_inference_endpoint`]. The current object is mutated in place with the
|
|
299
|
+
latest data from the server.
|
|
300
|
+
"""
|
|
301
|
+
obj = self._api.resume_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)
|
|
302
|
+
self.raw = obj.raw
|
|
303
|
+
self._populate_from_raw()
|
|
304
|
+
return self
|
|
305
|
+
|
|
306
|
+
def scale_to_zero(self) -> "InferenceEndpoint":
|
|
307
|
+
"""Scale Inference Endpoint to zero.
|
|
308
|
+
|
|
309
|
+
An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a
|
|
310
|
+
cold start delay. This is different than pausing the Inference Endpoint with [`InferenceEndpoint.pause`], which
|
|
311
|
+
would require a manual resume with [`InferenceEndpoint.resume`].
|
|
312
|
+
|
|
313
|
+
This is an alias for [`HfApi.scale_to_zero_inference_endpoint`]. The current object is mutated in place with the
|
|
314
|
+
latest data from the server.
|
|
315
|
+
"""
|
|
316
|
+
obj = self._api.scale_to_zero_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)
|
|
317
|
+
self.raw = obj.raw
|
|
318
|
+
self._populate_from_raw()
|
|
319
|
+
return self
|
|
320
|
+
|
|
321
|
+
def delete(self) -> None:
|
|
322
|
+
"""Delete the Inference Endpoint.
|
|
323
|
+
|
|
324
|
+
This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable
|
|
325
|
+
to pause it with [`InferenceEndpoint.pause`] or scale it to zero with [`InferenceEndpoint.scale_to_zero`].
|
|
326
|
+
|
|
327
|
+
This is an alias for [`HfApi.delete_inference_endpoint`].
|
|
328
|
+
"""
|
|
329
|
+
self._api.delete_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)
|
|
330
|
+
|
|
331
|
+
def _populate_from_raw(self) -> None:
|
|
332
|
+
"""Populate fields from raw dictionary.
|
|
333
|
+
|
|
334
|
+
Called in __post_init__ + each time the Inference Endpoint is updated.
|
|
335
|
+
"""
|
|
336
|
+
# Repr fields
|
|
337
|
+
self.name = self.raw["name"]
|
|
338
|
+
self.repository = self.raw["model"]["repository"]
|
|
339
|
+
self.status = self.raw["status"]["state"]
|
|
340
|
+
self.url = self.raw["status"].get("url")
|
|
341
|
+
|
|
342
|
+
# Other fields
|
|
343
|
+
self.framework = self.raw["model"]["framework"]
|
|
344
|
+
self.revision = self.raw["model"]["revision"]
|
|
345
|
+
self.task = self.raw["model"]["task"]
|
|
346
|
+
self.created_at = parse_datetime(self.raw["status"]["createdAt"])
|
|
347
|
+
self.updated_at = parse_datetime(self.raw["status"]["updatedAt"])
|
|
348
|
+
self.type = self.raw["type"]
|
huggingface_hub/_login.py
CHANGED
|
@@ -36,6 +36,14 @@ from .utils import (
|
|
|
36
36
|
|
|
37
37
|
logger = logging.get_logger(__name__)
|
|
38
38
|
|
|
39
|
+
_HF_LOGO_ASCII = """
|
|
40
|
+
_| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|
|
|
41
|
+
_| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
|
|
42
|
+
_|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|
|
|
43
|
+
_| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
|
|
44
|
+
_| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|
|
|
45
|
+
"""
|
|
46
|
+
|
|
39
47
|
|
|
40
48
|
def login(
|
|
41
49
|
token: Optional[str] = None,
|
|
@@ -144,13 +152,7 @@ def interpreter_login(new_session: bool = True, write_permission: bool = False)
|
|
|
144
152
|
print("User is already logged in.")
|
|
145
153
|
return
|
|
146
154
|
|
|
147
|
-
print(
|
|
148
|
-
_| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|
|
|
149
|
-
_| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
|
|
150
|
-
_|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|
|
|
151
|
-
_| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|
|
|
152
|
-
_| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|
|
|
153
|
-
""")
|
|
155
|
+
print(_HF_LOGO_ASCII)
|
|
154
156
|
if HfFolder.get_token() is not None:
|
|
155
157
|
print(
|
|
156
158
|
" A token is already saved on your machine. Run `huggingface-cli"
|
|
@@ -15,13 +15,13 @@
|
|
|
15
15
|
"""Contains utilities to multi-commits (i.e. push changes iteratively on a PR)."""
|
|
16
16
|
import re
|
|
17
17
|
from dataclasses import dataclass, field
|
|
18
|
-
from hashlib import sha256
|
|
19
18
|
from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Union
|
|
20
19
|
|
|
21
20
|
from ._commit_api import CommitOperationAdd, CommitOperationDelete
|
|
22
21
|
from .community import DiscussionWithDetails
|
|
23
22
|
from .utils import experimental
|
|
24
23
|
from .utils._cache_manager import _format_size
|
|
24
|
+
from .utils.insecure_hashlib import sha256
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
@@ -6,9 +6,10 @@ from tqdm.auto import tqdm as base_tqdm
|
|
|
6
6
|
from tqdm.contrib.concurrent import thread_map
|
|
7
7
|
|
|
8
8
|
from .constants import (
|
|
9
|
+
DEFAULT_ETAG_TIMEOUT,
|
|
9
10
|
DEFAULT_REVISION,
|
|
11
|
+
HF_HUB_CACHE,
|
|
10
12
|
HF_HUB_ENABLE_HF_TRANSFER,
|
|
11
|
-
HUGGINGFACE_HUB_CACHE,
|
|
12
13
|
REPO_TYPES,
|
|
13
14
|
)
|
|
14
15
|
from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
|
|
@@ -26,7 +27,6 @@ def snapshot_download(
|
|
|
26
27
|
*,
|
|
27
28
|
repo_type: Optional[str] = None,
|
|
28
29
|
revision: Optional[str] = None,
|
|
29
|
-
endpoint: Optional[str] = None,
|
|
30
30
|
cache_dir: Union[str, Path, None] = None,
|
|
31
31
|
local_dir: Union[str, Path, None] = None,
|
|
32
32
|
local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
|
|
@@ -34,7 +34,7 @@ def snapshot_download(
|
|
|
34
34
|
library_version: Optional[str] = None,
|
|
35
35
|
user_agent: Optional[Union[Dict, str]] = None,
|
|
36
36
|
proxies: Optional[Dict] = None,
|
|
37
|
-
etag_timeout: float =
|
|
37
|
+
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
|
|
38
38
|
resume_download: bool = False,
|
|
39
39
|
force_download: bool = False,
|
|
40
40
|
token: Optional[Union[bool, str]] = None,
|
|
@@ -43,6 +43,7 @@ def snapshot_download(
|
|
|
43
43
|
ignore_patterns: Optional[Union[List[str], str]] = None,
|
|
44
44
|
max_workers: int = 8,
|
|
45
45
|
tqdm_class: Optional[base_tqdm] = None,
|
|
46
|
+
endpoint: Optional[str] = None,
|
|
46
47
|
) -> str:
|
|
47
48
|
"""Download repo files.
|
|
48
49
|
|
|
@@ -78,9 +79,6 @@ def snapshot_download(
|
|
|
78
79
|
revision (`str`, *optional*):
|
|
79
80
|
An optional Git revision id which can be a branch name, a tag, or a
|
|
80
81
|
commit hash.
|
|
81
|
-
endpoint (`str`, *optional*):
|
|
82
|
-
Hugging Face Hub base url. Will default to https://huggingface.co/. Otherwise, one can set the `HF_ENDPOINT`
|
|
83
|
-
environment variable.
|
|
84
82
|
cache_dir (`str`, `Path`, *optional*):
|
|
85
83
|
Path to the folder where cached files are stored.
|
|
86
84
|
local_dir (`str` or `Path`, *optional*):
|
|
@@ -146,7 +144,7 @@ def snapshot_download(
|
|
|
146
144
|
</Tip>
|
|
147
145
|
"""
|
|
148
146
|
if cache_dir is None:
|
|
149
|
-
cache_dir =
|
|
147
|
+
cache_dir = HF_HUB_CACHE
|
|
150
148
|
if revision is None:
|
|
151
149
|
revision = DEFAULT_REVISION
|
|
152
150
|
if isinstance(cache_dir, Path):
|
|
@@ -188,6 +186,7 @@ def snapshot_download(
|
|
|
188
186
|
api = HfApi(library_name=library_name, library_version=library_version, user_agent=user_agent, endpoint=endpoint)
|
|
189
187
|
repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision, token=token)
|
|
190
188
|
assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
|
|
189
|
+
assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list."
|
|
191
190
|
|
|
192
191
|
filtered_repo_files = list(
|
|
193
192
|
filter_repo_objects(
|
huggingface_hub/_space_api.py
CHANGED
|
@@ -61,8 +61,11 @@ class SpaceHardware(str, Enum):
|
|
|
61
61
|
CPU_UPGRADE = "cpu-upgrade"
|
|
62
62
|
T4_SMALL = "t4-small"
|
|
63
63
|
T4_MEDIUM = "t4-medium"
|
|
64
|
+
ZERO_A10G = "zero-a10g"
|
|
64
65
|
A10G_SMALL = "a10g-small"
|
|
65
66
|
A10G_LARGE = "a10g-large"
|
|
67
|
+
A10G_LARGEX2 = "a10g-largex2"
|
|
68
|
+
A10G_LARGEX4 = "a10g-largex4"
|
|
66
69
|
A100_LARGE = "a100-large"
|
|
67
70
|
|
|
68
71
|
|
|
@@ -116,10 +119,10 @@ class SpaceRuntime:
|
|
|
116
119
|
|
|
117
120
|
def __init__(self, data: Dict) -> None:
|
|
118
121
|
self.stage = data["stage"]
|
|
119
|
-
self.hardware = data
|
|
120
|
-
self.requested_hardware = data
|
|
121
|
-
self.sleep_time = data
|
|
122
|
-
self.storage = data
|
|
122
|
+
self.hardware = data.get("hardware", {}).get("current")
|
|
123
|
+
self.requested_hardware = data.get("hardware", {}).get("requested")
|
|
124
|
+
self.sleep_time = data.get("gcTimeout")
|
|
125
|
+
self.storage = data.get("storage")
|
|
123
126
|
self.raw = data
|
|
124
127
|
|
|
125
128
|
|