huggingface-hub 0.36.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +33 -45
- huggingface_hub/_commit_api.py +39 -43
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +20 -20
- huggingface_hub/_login.py +17 -43
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +135 -50
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +5 -5
- huggingface_hub/_upload_large_folder.py +18 -32
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/__init__.py +0 -14
- huggingface_hub/cli/_cli_utils.py +143 -39
- huggingface_hub/cli/auth.py +105 -171
- huggingface_hub/cli/cache.py +594 -361
- huggingface_hub/cli/download.py +120 -112
- huggingface_hub/cli/hf.py +38 -41
- huggingface_hub/cli/jobs.py +689 -1017
- huggingface_hub/cli/lfs.py +120 -143
- huggingface_hub/cli/repo.py +282 -216
- huggingface_hub/cli/repo_files.py +50 -84
- huggingface_hub/cli/system.py +6 -25
- huggingface_hub/cli/upload.py +198 -220
- huggingface_hub/cli/upload_large_folder.py +91 -106
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +17 -52
- huggingface_hub/dataclasses.py +135 -21
- huggingface_hub/errors.py +47 -30
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +351 -303
- huggingface_hub/hf_api.py +398 -570
- huggingface_hub/hf_file_system.py +101 -66
- huggingface_hub/hub_mixin.py +32 -54
- huggingface_hub/inference/_client.py +177 -162
- huggingface_hub/inference/_common.py +38 -54
- huggingface_hub/inference/_generated/_async_client.py +218 -258
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/constants.py +1 -2
- huggingface_hub/inference/_mcp/mcp_client.py +33 -22
- huggingface_hub/inference/_mcp/types.py +10 -10
- huggingface_hub/inference/_mcp/utils.py +4 -4
- huggingface_hub/inference/_providers/__init__.py +12 -4
- huggingface_hub/inference/_providers/_common.py +62 -24
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +13 -13
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +21 -94
- huggingface_hub/repocard.py +15 -16
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +11 -6
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +49 -74
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +3 -3
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +371 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +59 -23
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
- huggingface_hub/utils/_typing.py +3 -3
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +1 -1
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
- huggingface_hub-1.0.0.dist-info/RECORD +152 -0
- {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -204
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -161
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -208
- huggingface_hub/commands/version.py +0 -40
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -497
- huggingface_hub/repository.py +0 -1471
- huggingface_hub/serialization/_tensorflow.py +0 -92
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.36.0.dist-info/RECORD +0 -170
- {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import time
|
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from enum import Enum
|
|
5
|
-
from typing import TYPE_CHECKING,
|
|
5
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
6
6
|
|
|
7
7
|
from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
|
|
8
8
|
|
|
@@ -62,7 +62,7 @@ class InferenceEndpoint:
|
|
|
62
62
|
The timestamp of the last update of the Inference Endpoint.
|
|
63
63
|
type ([`InferenceEndpointType`]):
|
|
64
64
|
The type of the Inference Endpoint (public, protected, private).
|
|
65
|
-
raw (`
|
|
65
|
+
raw (`dict`):
|
|
66
66
|
The raw dictionary data returned from the API.
|
|
67
67
|
token (`str` or `bool`, *optional*):
|
|
68
68
|
Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the
|
|
@@ -112,7 +112,7 @@ class InferenceEndpoint:
|
|
|
112
112
|
type: InferenceEndpointType = field(repr=False, init=False)
|
|
113
113
|
|
|
114
114
|
# Raw dict from the API
|
|
115
|
-
raw:
|
|
115
|
+
raw: dict = field(repr=False)
|
|
116
116
|
|
|
117
117
|
# Internal fields
|
|
118
118
|
_token: Union[str, bool, None] = field(repr=False, compare=False)
|
|
@@ -120,7 +120,7 @@ class InferenceEndpoint:
|
|
|
120
120
|
|
|
121
121
|
@classmethod
|
|
122
122
|
def from_raw(
|
|
123
|
-
cls, raw:
|
|
123
|
+
cls, raw: dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
|
|
124
124
|
) -> "InferenceEndpoint":
|
|
125
125
|
"""Initialize object from raw dictionary."""
|
|
126
126
|
if api is None:
|
|
@@ -260,8 +260,8 @@ class InferenceEndpoint:
|
|
|
260
260
|
framework: Optional[str] = None,
|
|
261
261
|
revision: Optional[str] = None,
|
|
262
262
|
task: Optional[str] = None,
|
|
263
|
-
custom_image: Optional[
|
|
264
|
-
secrets: Optional[
|
|
263
|
+
custom_image: Optional[dict] = None,
|
|
264
|
+
secrets: Optional[dict[str, str]] = None,
|
|
265
265
|
) -> "InferenceEndpoint":
|
|
266
266
|
"""Update the Inference Endpoint.
|
|
267
267
|
|
|
@@ -293,10 +293,10 @@ class InferenceEndpoint:
|
|
|
293
293
|
The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
|
|
294
294
|
task (`str`, *optional*):
|
|
295
295
|
The task on which to deploy the model (e.g. `"text-classification"`).
|
|
296
|
-
custom_image (`
|
|
296
|
+
custom_image (`dict`, *optional*):
|
|
297
297
|
A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
|
|
298
298
|
Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
|
|
299
|
-
secrets (`
|
|
299
|
+
secrets (`dict[str, str]`, *optional*):
|
|
300
300
|
Secret values to inject in the container environment.
|
|
301
301
|
Returns:
|
|
302
302
|
[`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
|
huggingface_hub/_jobs_api.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from dataclasses import dataclass
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from enum import Enum
|
|
18
|
-
from typing import Any,
|
|
18
|
+
from typing import Any, Optional, Union
|
|
19
19
|
|
|
20
20
|
from huggingface_hub import constants
|
|
21
21
|
from huggingface_hub._space_api import SpaceHardware
|
|
@@ -71,13 +71,13 @@ class JobInfo:
|
|
|
71
71
|
space_id (`str` or `None`):
|
|
72
72
|
The Docker image from Hugging Face Spaces used for the Job.
|
|
73
73
|
Can be None if docker_image is present instead.
|
|
74
|
-
command (`
|
|
74
|
+
command (`list[str]` or `None`):
|
|
75
75
|
Command of the Job, e.g. `["python", "-c", "print('hello world')"]`
|
|
76
|
-
arguments (`
|
|
76
|
+
arguments (`list[str]` or `None`):
|
|
77
77
|
Arguments passed to the command
|
|
78
|
-
environment (`
|
|
78
|
+
environment (`dict[str]` or `None`):
|
|
79
79
|
Environment variables of the Job as a dictionary.
|
|
80
|
-
secrets (`
|
|
80
|
+
secrets (`dict[str]` or `None`):
|
|
81
81
|
Secret environment variables of the Job (encrypted).
|
|
82
82
|
flavor (`str` or `None`):
|
|
83
83
|
Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
|
|
@@ -111,10 +111,10 @@ class JobInfo:
|
|
|
111
111
|
created_at: Optional[datetime]
|
|
112
112
|
docker_image: Optional[str]
|
|
113
113
|
space_id: Optional[str]
|
|
114
|
-
command: Optional[
|
|
115
|
-
arguments: Optional[
|
|
116
|
-
environment: Optional[
|
|
117
|
-
secrets: Optional[
|
|
114
|
+
command: Optional[list[str]]
|
|
115
|
+
arguments: Optional[list[str]]
|
|
116
|
+
environment: Optional[dict[str, Any]]
|
|
117
|
+
secrets: Optional[dict[str, Any]]
|
|
118
118
|
flavor: Optional[SpaceHardware]
|
|
119
119
|
status: JobStatus
|
|
120
120
|
owner: JobOwner
|
|
@@ -148,13 +148,13 @@ class JobInfo:
|
|
|
148
148
|
class JobSpec:
|
|
149
149
|
docker_image: Optional[str]
|
|
150
150
|
space_id: Optional[str]
|
|
151
|
-
command: Optional[
|
|
152
|
-
arguments: Optional[
|
|
153
|
-
environment: Optional[
|
|
154
|
-
secrets: Optional[
|
|
151
|
+
command: Optional[list[str]]
|
|
152
|
+
arguments: Optional[list[str]]
|
|
153
|
+
environment: Optional[dict[str, Any]]
|
|
154
|
+
secrets: Optional[dict[str, Any]]
|
|
155
155
|
flavor: Optional[SpaceHardware]
|
|
156
156
|
timeout: Optional[int]
|
|
157
|
-
tags: Optional[
|
|
157
|
+
tags: Optional[list[str]]
|
|
158
158
|
arch: Optional[str]
|
|
159
159
|
|
|
160
160
|
def __init__(self, **kwargs) -> None:
|
|
@@ -202,7 +202,7 @@ class ScheduledJobInfo:
|
|
|
202
202
|
Scheduled Job ID.
|
|
203
203
|
created_at (`datetime` or `None`):
|
|
204
204
|
When the scheduled Job was created.
|
|
205
|
-
tags (`
|
|
205
|
+
tags (`list[str]` or `None`):
|
|
206
206
|
The tags of the scheduled Job.
|
|
207
207
|
schedule (`str` or `None`):
|
|
208
208
|
One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
|
|
@@ -263,14 +263,14 @@ class ScheduledJobInfo:
|
|
|
263
263
|
def _create_job_spec(
|
|
264
264
|
*,
|
|
265
265
|
image: str,
|
|
266
|
-
command:
|
|
267
|
-
env: Optional[
|
|
268
|
-
secrets: Optional[
|
|
266
|
+
command: list[str],
|
|
267
|
+
env: Optional[dict[str, Any]],
|
|
268
|
+
secrets: Optional[dict[str, Any]],
|
|
269
269
|
flavor: Optional[SpaceHardware],
|
|
270
270
|
timeout: Optional[Union[int, float, str]],
|
|
271
|
-
) ->
|
|
271
|
+
) -> dict[str, Any]:
|
|
272
272
|
# prepare job spec to send to HF Jobs API
|
|
273
|
-
job_spec:
|
|
273
|
+
job_spec: dict[str, Any] = {
|
|
274
274
|
"command": command,
|
|
275
275
|
"arguments": [],
|
|
276
276
|
"environment": env or {},
|
huggingface_hub/_login.py
CHANGED
|
@@ -19,9 +19,11 @@ from getpass import getpass
|
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
from typing import Optional
|
|
21
21
|
|
|
22
|
+
import typer
|
|
23
|
+
|
|
22
24
|
from . import constants
|
|
23
|
-
from .commands._cli_utils import ANSI
|
|
24
25
|
from .utils import (
|
|
26
|
+
ANSI,
|
|
25
27
|
capture_output,
|
|
26
28
|
get_token,
|
|
27
29
|
is_google_colab,
|
|
@@ -41,7 +43,6 @@ from .utils._auth import (
|
|
|
41
43
|
_save_token,
|
|
42
44
|
get_stored_tokens,
|
|
43
45
|
)
|
|
44
|
-
from .utils._deprecation import _deprecate_arguments, _deprecate_positional_args
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
logger = logging.get_logger(__name__)
|
|
@@ -55,18 +56,11 @@ _HF_LOGO_ASCII = """
|
|
|
55
56
|
"""
|
|
56
57
|
|
|
57
58
|
|
|
58
|
-
@_deprecate_arguments(
|
|
59
|
-
version="1.0",
|
|
60
|
-
deprecated_args="write_permission",
|
|
61
|
-
custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
|
|
62
|
-
)
|
|
63
|
-
@_deprecate_positional_args(version="1.0")
|
|
64
59
|
def login(
|
|
65
60
|
token: Optional[str] = None,
|
|
66
61
|
*,
|
|
67
62
|
add_to_git_credential: bool = False,
|
|
68
|
-
|
|
69
|
-
write_permission: bool = False,
|
|
63
|
+
skip_if_logged_in: bool = False,
|
|
70
64
|
) -> None:
|
|
71
65
|
"""Login the machine to access the Hub.
|
|
72
66
|
|
|
@@ -96,10 +90,8 @@ def login(
|
|
|
96
90
|
is configured, a warning will be displayed to the user. If `token` is `None`,
|
|
97
91
|
the value of `add_to_git_credential` is ignored and will be prompted again
|
|
98
92
|
to the end user.
|
|
99
|
-
|
|
100
|
-
If `True`,
|
|
101
|
-
write_permission (`bool`):
|
|
102
|
-
Ignored and deprecated argument.
|
|
93
|
+
skip_if_logged_in (`bool`, defaults to `False`):
|
|
94
|
+
If `True`, do not prompt for token if user is already logged in.
|
|
103
95
|
Raises:
|
|
104
96
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
105
97
|
If an organization token is passed. Only personal account tokens are valid
|
|
@@ -119,9 +111,9 @@ def login(
|
|
|
119
111
|
)
|
|
120
112
|
_login(token, add_to_git_credential=add_to_git_credential)
|
|
121
113
|
elif is_notebook():
|
|
122
|
-
notebook_login(
|
|
114
|
+
notebook_login(skip_if_logged_in=skip_if_logged_in)
|
|
123
115
|
else:
|
|
124
|
-
interpreter_login(
|
|
116
|
+
interpreter_login(skip_if_logged_in=skip_if_logged_in)
|
|
125
117
|
|
|
126
118
|
|
|
127
119
|
def logout(token_name: Optional[str] = None) -> None:
|
|
@@ -236,13 +228,7 @@ def auth_list() -> None:
|
|
|
236
228
|
###
|
|
237
229
|
|
|
238
230
|
|
|
239
|
-
|
|
240
|
-
version="1.0",
|
|
241
|
-
deprecated_args="write_permission",
|
|
242
|
-
custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
|
|
243
|
-
)
|
|
244
|
-
@_deprecate_positional_args(version="1.0")
|
|
245
|
-
def interpreter_login(*, new_session: bool = True, write_permission: bool = False) -> None:
|
|
231
|
+
def interpreter_login(*, skip_if_logged_in: bool = False) -> None:
|
|
246
232
|
"""
|
|
247
233
|
Displays a prompt to log in to the HF website and store the token.
|
|
248
234
|
|
|
@@ -253,17 +239,13 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
|
|
|
253
239
|
For more details, see [`login`].
|
|
254
240
|
|
|
255
241
|
Args:
|
|
256
|
-
|
|
257
|
-
If `True`,
|
|
258
|
-
write_permission (`bool`):
|
|
259
|
-
Ignored and deprecated argument.
|
|
242
|
+
skip_if_logged_in (`bool`, defaults to `False`):
|
|
243
|
+
If `True`, do not prompt for token if user is already logged in.
|
|
260
244
|
"""
|
|
261
|
-
if not
|
|
245
|
+
if not skip_if_logged_in and get_token() is not None:
|
|
262
246
|
logger.info("User is already logged in.")
|
|
263
247
|
return
|
|
264
248
|
|
|
265
|
-
from .commands.delete_cache import _ask_for_confirmation_no_tui
|
|
266
|
-
|
|
267
249
|
print(_HF_LOGO_ASCII)
|
|
268
250
|
if get_token() is not None:
|
|
269
251
|
logger.info(
|
|
@@ -279,7 +261,7 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
|
|
|
279
261
|
if os.name == "nt":
|
|
280
262
|
logger.info("Token can be pasted using 'Right-Click'.")
|
|
281
263
|
token = getpass("Enter your token (input will not be visible): ")
|
|
282
|
-
add_to_git_credential =
|
|
264
|
+
add_to_git_credential = typer.confirm("Add token as git credential?")
|
|
283
265
|
|
|
284
266
|
_login(token=token, add_to_git_credential=add_to_git_credential)
|
|
285
267
|
|
|
@@ -308,13 +290,7 @@ NOTEBOOK_LOGIN_TOKEN_HTML_END = """
|
|
|
308
290
|
notebooks. </center>"""
|
|
309
291
|
|
|
310
292
|
|
|
311
|
-
|
|
312
|
-
version="1.0",
|
|
313
|
-
deprecated_args="write_permission",
|
|
314
|
-
custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
|
|
315
|
-
)
|
|
316
|
-
@_deprecate_positional_args(version="1.0")
|
|
317
|
-
def notebook_login(*, new_session: bool = True, write_permission: bool = False) -> None:
|
|
293
|
+
def notebook_login(*, skip_if_logged_in: bool = False) -> None:
|
|
318
294
|
"""
|
|
319
295
|
Displays a widget to log in to the HF website and store the token.
|
|
320
296
|
|
|
@@ -325,10 +301,8 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False)
|
|
|
325
301
|
For more details, see [`login`].
|
|
326
302
|
|
|
327
303
|
Args:
|
|
328
|
-
|
|
329
|
-
If `True`,
|
|
330
|
-
write_permission (`bool`):
|
|
331
|
-
Ignored and deprecated argument.
|
|
304
|
+
skip_if_logged_in (`bool`, defaults to `False`):
|
|
305
|
+
If `True`, do not prompt for token if user is already logged in.
|
|
332
306
|
"""
|
|
333
307
|
try:
|
|
334
308
|
import ipywidgets.widgets as widgets # type: ignore
|
|
@@ -338,7 +312,7 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False)
|
|
|
338
312
|
"The `notebook_login` function can only be used in a notebook (Jupyter or"
|
|
339
313
|
" Colab) and you need the `ipywidgets` module: `pip install ipywidgets`."
|
|
340
314
|
)
|
|
341
|
-
if not
|
|
315
|
+
if not skip_if_logged_in and get_token() is not None:
|
|
342
316
|
logger.info("User is already logged in.")
|
|
343
317
|
return
|
|
344
318
|
|
huggingface_hub/_oauth.py
CHANGED
|
@@ -6,7 +6,7 @@ import time
|
|
|
6
6
|
import urllib.parse
|
|
7
7
|
import warnings
|
|
8
8
|
from dataclasses import dataclass
|
|
9
|
-
from typing import TYPE_CHECKING,
|
|
9
|
+
from typing import TYPE_CHECKING, Literal, Optional, Union
|
|
10
10
|
|
|
11
11
|
from . import constants
|
|
12
12
|
from .hf_api import whoami
|
|
@@ -39,7 +39,7 @@ class OAuthOrgInfo:
|
|
|
39
39
|
Whether the org has a payment method set up. Hugging Face field.
|
|
40
40
|
role_in_org (`Optional[str]`, *optional*):
|
|
41
41
|
The user's role in the org. Hugging Face field.
|
|
42
|
-
security_restrictions (`Optional[
|
|
42
|
+
security_restrictions (`Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
|
|
43
43
|
Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field.
|
|
44
44
|
"""
|
|
45
45
|
|
|
@@ -50,7 +50,7 @@ class OAuthOrgInfo:
|
|
|
50
50
|
is_enterprise: bool
|
|
51
51
|
can_pay: Optional[bool] = None
|
|
52
52
|
role_in_org: Optional[str] = None
|
|
53
|
-
security_restrictions: Optional[
|
|
53
|
+
security_restrictions: Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]] = None
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
@dataclass
|
|
@@ -79,7 +79,7 @@ class OAuthUserInfo:
|
|
|
79
79
|
Whether the user is a pro user. Hugging Face field.
|
|
80
80
|
can_pay (`Optional[bool]`, *optional*):
|
|
81
81
|
Whether the user has a payment method set up. Hugging Face field.
|
|
82
|
-
orgs (`Optional[
|
|
82
|
+
orgs (`Optional[list[OrgInfo]]`, *optional*):
|
|
83
83
|
List of organizations the user is part of. Hugging Face field.
|
|
84
84
|
"""
|
|
85
85
|
|
|
@@ -93,7 +93,7 @@ class OAuthUserInfo:
|
|
|
93
93
|
website: Optional[str]
|
|
94
94
|
is_pro: bool
|
|
95
95
|
can_pay: Optional[bool]
|
|
96
|
-
orgs: Optional[
|
|
96
|
+
orgs: Optional[list[OAuthOrgInfo]]
|
|
97
97
|
|
|
98
98
|
|
|
99
99
|
@dataclass
|
|
@@ -306,7 +306,7 @@ def _add_oauth_routes(app: "fastapi.FastAPI", route_prefix: str) -> None:
|
|
|
306
306
|
target_url = request.query_params.get("_target_url")
|
|
307
307
|
|
|
308
308
|
# Build redirect URI with the same query params as before and bump nb_redirects count
|
|
309
|
-
query_params:
|
|
309
|
+
query_params: dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1}
|
|
310
310
|
if target_url:
|
|
311
311
|
query_params["_target_url"] = target_url
|
|
312
312
|
|
|
@@ -406,7 +406,7 @@ def _get_redirect_target(request: "fastapi.Request", default_target: str = "/")
|
|
|
406
406
|
return request.query_params.get("_target_url", default_target)
|
|
407
407
|
|
|
408
408
|
|
|
409
|
-
def _get_mocked_oauth_info() ->
|
|
409
|
+
def _get_mocked_oauth_info() -> dict:
|
|
410
410
|
token = get_token()
|
|
411
411
|
if token is None:
|
|
412
412
|
raise ValueError(
|
|
@@ -449,7 +449,7 @@ def _get_mocked_oauth_info() -> Dict:
|
|
|
449
449
|
}
|
|
450
450
|
|
|
451
451
|
|
|
452
|
-
def _get_oauth_uris(route_prefix: str = "/") ->
|
|
452
|
+
def _get_oauth_uris(route_prefix: str = "/") -> tuple[str, str, str]:
|
|
453
453
|
route_prefix = route_prefix.strip("/")
|
|
454
454
|
if route_prefix:
|
|
455
455
|
route_prefix = f"/{route_prefix}"
|
|
@@ -1,20 +1,21 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Iterable, List, Literal, Optional, Union, overload
|
|
4
4
|
|
|
5
|
-
import
|
|
5
|
+
import httpx
|
|
6
6
|
from tqdm.auto import tqdm as base_tqdm
|
|
7
7
|
from tqdm.contrib.concurrent import thread_map
|
|
8
8
|
|
|
9
9
|
from . import constants
|
|
10
10
|
from .errors import (
|
|
11
|
+
DryRunError,
|
|
11
12
|
GatedRepoError,
|
|
12
13
|
HfHubHTTPError,
|
|
13
14
|
LocalEntryNotFoundError,
|
|
14
15
|
RepositoryNotFoundError,
|
|
15
16
|
RevisionNotFoundError,
|
|
16
17
|
)
|
|
17
|
-
from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
|
|
18
|
+
from .file_download import REGEX_COMMIT_HASH, DryRunFileInfo, hf_hub_download, repo_folder_name
|
|
18
19
|
from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo
|
|
19
20
|
from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
|
|
20
21
|
from .utils import tqdm as hf_tqdm
|
|
@@ -25,6 +26,81 @@ logger = logging.get_logger(__name__)
|
|
|
25
26
|
VERY_LARGE_REPO_THRESHOLD = 50000 # After this limit, we don't consider `repo_info.siblings` to be reliable enough
|
|
26
27
|
|
|
27
28
|
|
|
29
|
+
@overload
|
|
30
|
+
def snapshot_download(
|
|
31
|
+
repo_id: str,
|
|
32
|
+
*,
|
|
33
|
+
repo_type: Optional[str] = None,
|
|
34
|
+
revision: Optional[str] = None,
|
|
35
|
+
cache_dir: Union[str, Path, None] = None,
|
|
36
|
+
local_dir: Union[str, Path, None] = None,
|
|
37
|
+
library_name: Optional[str] = None,
|
|
38
|
+
library_version: Optional[str] = None,
|
|
39
|
+
user_agent: Optional[Union[dict, str]] = None,
|
|
40
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
41
|
+
force_download: bool = False,
|
|
42
|
+
token: Optional[Union[bool, str]] = None,
|
|
43
|
+
local_files_only: bool = False,
|
|
44
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
45
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
46
|
+
max_workers: int = 8,
|
|
47
|
+
tqdm_class: Optional[type[base_tqdm]] = None,
|
|
48
|
+
headers: Optional[dict[str, str]] = None,
|
|
49
|
+
endpoint: Optional[str] = None,
|
|
50
|
+
dry_run: Literal[False] = False,
|
|
51
|
+
) -> str: ...
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@overload
|
|
55
|
+
def snapshot_download(
|
|
56
|
+
repo_id: str,
|
|
57
|
+
*,
|
|
58
|
+
repo_type: Optional[str] = None,
|
|
59
|
+
revision: Optional[str] = None,
|
|
60
|
+
cache_dir: Union[str, Path, None] = None,
|
|
61
|
+
local_dir: Union[str, Path, None] = None,
|
|
62
|
+
library_name: Optional[str] = None,
|
|
63
|
+
library_version: Optional[str] = None,
|
|
64
|
+
user_agent: Optional[Union[dict, str]] = None,
|
|
65
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
66
|
+
force_download: bool = False,
|
|
67
|
+
token: Optional[Union[bool, str]] = None,
|
|
68
|
+
local_files_only: bool = False,
|
|
69
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
70
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
71
|
+
max_workers: int = 8,
|
|
72
|
+
tqdm_class: Optional[type[base_tqdm]] = None,
|
|
73
|
+
headers: Optional[dict[str, str]] = None,
|
|
74
|
+
endpoint: Optional[str] = None,
|
|
75
|
+
dry_run: Literal[True] = True,
|
|
76
|
+
) -> list[DryRunFileInfo]: ...
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@overload
|
|
80
|
+
def snapshot_download(
|
|
81
|
+
repo_id: str,
|
|
82
|
+
*,
|
|
83
|
+
repo_type: Optional[str] = None,
|
|
84
|
+
revision: Optional[str] = None,
|
|
85
|
+
cache_dir: Union[str, Path, None] = None,
|
|
86
|
+
local_dir: Union[str, Path, None] = None,
|
|
87
|
+
library_name: Optional[str] = None,
|
|
88
|
+
library_version: Optional[str] = None,
|
|
89
|
+
user_agent: Optional[Union[dict, str]] = None,
|
|
90
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
91
|
+
force_download: bool = False,
|
|
92
|
+
token: Optional[Union[bool, str]] = None,
|
|
93
|
+
local_files_only: bool = False,
|
|
94
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
95
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
96
|
+
max_workers: int = 8,
|
|
97
|
+
tqdm_class: Optional[type[base_tqdm]] = None,
|
|
98
|
+
headers: Optional[dict[str, str]] = None,
|
|
99
|
+
endpoint: Optional[str] = None,
|
|
100
|
+
dry_run: bool = False,
|
|
101
|
+
) -> Union[str, list[DryRunFileInfo]]: ...
|
|
102
|
+
|
|
103
|
+
|
|
28
104
|
@validate_hf_hub_args
|
|
29
105
|
def snapshot_download(
|
|
30
106
|
repo_id: str,
|
|
@@ -35,22 +111,19 @@ def snapshot_download(
|
|
|
35
111
|
local_dir: Union[str, Path, None] = None,
|
|
36
112
|
library_name: Optional[str] = None,
|
|
37
113
|
library_version: Optional[str] = None,
|
|
38
|
-
user_agent: Optional[Union[
|
|
39
|
-
proxies: Optional[Dict] = None,
|
|
114
|
+
user_agent: Optional[Union[dict, str]] = None,
|
|
40
115
|
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
41
116
|
force_download: bool = False,
|
|
42
117
|
token: Optional[Union[bool, str]] = None,
|
|
43
118
|
local_files_only: bool = False,
|
|
44
|
-
allow_patterns: Optional[Union[
|
|
45
|
-
ignore_patterns: Optional[Union[
|
|
119
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
120
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
46
121
|
max_workers: int = 8,
|
|
47
|
-
tqdm_class: Optional[
|
|
48
|
-
headers: Optional[
|
|
122
|
+
tqdm_class: Optional[type[base_tqdm]] = None,
|
|
123
|
+
headers: Optional[dict[str, str]] = None,
|
|
49
124
|
endpoint: Optional[str] = None,
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
resume_download: Optional[bool] = None,
|
|
53
|
-
) -> str:
|
|
125
|
+
dry_run: bool = False,
|
|
126
|
+
) -> Union[str, list[DryRunFileInfo]]:
|
|
54
127
|
"""Download repo files.
|
|
55
128
|
|
|
56
129
|
Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
|
|
@@ -85,12 +158,9 @@ def snapshot_download(
|
|
|
85
158
|
The version of the library.
|
|
86
159
|
user_agent (`str`, `dict`, *optional*):
|
|
87
160
|
The user-agent info in the form of a dictionary or a string.
|
|
88
|
-
proxies (`dict`, *optional*):
|
|
89
|
-
Dictionary mapping protocol to the URL of the proxy passed to
|
|
90
|
-
`requests.request`.
|
|
91
161
|
etag_timeout (`float`, *optional*, defaults to `10`):
|
|
92
162
|
When fetching ETag, how many seconds to wait for the server to send
|
|
93
|
-
data before giving up which is passed to `
|
|
163
|
+
data before giving up which is passed to `httpx.request`.
|
|
94
164
|
force_download (`bool`, *optional*, defaults to `False`):
|
|
95
165
|
Whether the file should be downloaded even if it already exists in the local cache.
|
|
96
166
|
token (`str`, `bool`, *optional*):
|
|
@@ -103,9 +173,9 @@ def snapshot_download(
|
|
|
103
173
|
local_files_only (`bool`, *optional*, defaults to `False`):
|
|
104
174
|
If `True`, avoid downloading the file and return the path to the
|
|
105
175
|
local cached file if it exists.
|
|
106
|
-
allow_patterns (`
|
|
176
|
+
allow_patterns (`list[str]` or `str`, *optional*):
|
|
107
177
|
If provided, only files matching at least one pattern are downloaded.
|
|
108
|
-
ignore_patterns (`
|
|
178
|
+
ignore_patterns (`list[str]` or `str`, *optional*):
|
|
109
179
|
If provided, files matching any of the patterns are not downloaded.
|
|
110
180
|
max_workers (`int`, *optional*):
|
|
111
181
|
Number of concurrent threads to download files (1 thread = 1 file download).
|
|
@@ -116,9 +186,14 @@ def snapshot_download(
|
|
|
116
186
|
Note that the `tqdm_class` is not passed to each individual download.
|
|
117
187
|
Defaults to the custom HF progress bar that can be disabled by setting
|
|
118
188
|
`HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
|
|
189
|
+
dry_run (`bool`, *optional*, defaults to `False`):
|
|
190
|
+
If `True`, perform a dry run without actually downloading the files. Returns a list of
|
|
191
|
+
[`DryRunFileInfo`] objects containing information about what would be downloaded.
|
|
119
192
|
|
|
120
193
|
Returns:
|
|
121
|
-
`str
|
|
194
|
+
`str` or list of [`DryRunFileInfo`]:
|
|
195
|
+
- If `dry_run=False`: Local snapshot path.
|
|
196
|
+
- If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information.
|
|
122
197
|
|
|
123
198
|
Raises:
|
|
124
199
|
[`~utils.RepositoryNotFoundError`]
|
|
@@ -163,14 +238,10 @@ def snapshot_download(
|
|
|
163
238
|
try:
|
|
164
239
|
# if we have internet connection we want to list files to download
|
|
165
240
|
repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision)
|
|
166
|
-
except
|
|
167
|
-
# Actually raise
|
|
241
|
+
except httpx.ProxyError:
|
|
242
|
+
# Actually raise on proxy error
|
|
168
243
|
raise
|
|
169
|
-
except (
|
|
170
|
-
requests.exceptions.ConnectionError,
|
|
171
|
-
requests.exceptions.Timeout,
|
|
172
|
-
OfflineModeIsEnabled,
|
|
173
|
-
) as error:
|
|
244
|
+
except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
|
|
174
245
|
# Internet connection is down
|
|
175
246
|
# => will try to use local files only
|
|
176
247
|
api_call_error = error
|
|
@@ -178,7 +249,7 @@ def snapshot_download(
|
|
|
178
249
|
except RevisionNotFoundError:
|
|
179
250
|
# The repo was found but the revision doesn't exist on the Hub (never existed or got deleted)
|
|
180
251
|
raise
|
|
181
|
-
except
|
|
252
|
+
except HfHubHTTPError as error:
|
|
182
253
|
# Multiple reasons for an http error:
|
|
183
254
|
# - Repository is private and invalid/missing token sent
|
|
184
255
|
# - Repository is gated and invalid/missing token sent
|
|
@@ -198,6 +269,11 @@ def snapshot_download(
|
|
|
198
269
|
# - f the specified revision is a branch or tag, look inside "refs".
|
|
199
270
|
# => if local_dir is not None, we will return the path to the local folder if it exists.
|
|
200
271
|
if repo_info is None:
|
|
272
|
+
if dry_run:
|
|
273
|
+
raise DryRunError(
|
|
274
|
+
"Dry run cannot be performed as the repository cannot be accessed. Please check your internet connection or authentication token."
|
|
275
|
+
) from api_call_error
|
|
276
|
+
|
|
201
277
|
# Try to get which commit hash corresponds to the specified revision
|
|
202
278
|
commit_hash = None
|
|
203
279
|
if REGEX_COMMIT_HASH.match(revision):
|
|
@@ -284,6 +360,8 @@ def snapshot_download(
|
|
|
284
360
|
tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
|
|
285
361
|
else:
|
|
286
362
|
tqdm_desc = "Fetching ... files"
|
|
363
|
+
if dry_run:
|
|
364
|
+
tqdm_desc = "[dry-run] " + tqdm_desc
|
|
287
365
|
|
|
288
366
|
commit_hash = repo_info.sha
|
|
289
367
|
snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
|
|
@@ -299,33 +377,36 @@ def snapshot_download(
|
|
|
299
377
|
except OSError as e:
|
|
300
378
|
logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.")
|
|
301
379
|
|
|
380
|
+
results: List[Union[str, DryRunFileInfo]] = []
|
|
381
|
+
|
|
302
382
|
# we pass the commit_hash to hf_hub_download
|
|
303
383
|
# so no network call happens if we already
|
|
304
384
|
# have the file locally.
|
|
305
|
-
def _inner_hf_hub_download(repo_file: str):
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
385
|
+
def _inner_hf_hub_download(repo_file: str) -> None:
|
|
386
|
+
results.append(
|
|
387
|
+
hf_hub_download( # type: ignore[no-matching-overload] # ty not happy, don't know why :/
|
|
388
|
+
repo_id,
|
|
389
|
+
filename=repo_file,
|
|
390
|
+
repo_type=repo_type,
|
|
391
|
+
revision=commit_hash,
|
|
392
|
+
endpoint=endpoint,
|
|
393
|
+
cache_dir=cache_dir,
|
|
394
|
+
local_dir=local_dir,
|
|
395
|
+
library_name=library_name,
|
|
396
|
+
library_version=library_version,
|
|
397
|
+
user_agent=user_agent,
|
|
398
|
+
etag_timeout=etag_timeout,
|
|
399
|
+
force_download=force_download,
|
|
400
|
+
token=token,
|
|
401
|
+
headers=headers,
|
|
402
|
+
dry_run=dry_run,
|
|
403
|
+
)
|
|
324
404
|
)
|
|
325
405
|
|
|
326
|
-
if constants.
|
|
327
|
-
# when using
|
|
328
|
-
# from the one
|
|
406
|
+
if constants.HF_XET_HIGH_PERFORMANCE and not dry_run:
|
|
407
|
+
# when using hf_xet high performance we don't want extra parallelism
|
|
408
|
+
# from the one hf_xet provides
|
|
409
|
+
# TODO: revisit this when xet_session is implemented
|
|
329
410
|
for file in filtered_repo_files:
|
|
330
411
|
_inner_hf_hub_download(file)
|
|
331
412
|
else:
|
|
@@ -338,6 +419,10 @@ def snapshot_download(
|
|
|
338
419
|
tqdm_class=tqdm_class or hf_tqdm,
|
|
339
420
|
)
|
|
340
421
|
|
|
422
|
+
if dry_run:
|
|
423
|
+
assert all(isinstance(r, DryRunFileInfo) for r in results)
|
|
424
|
+
return results # type: ignore
|
|
425
|
+
|
|
341
426
|
if local_dir is not None:
|
|
342
427
|
return str(os.path.realpath(local_dir))
|
|
343
428
|
return snapshot_folder
|