huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +33 -45
- huggingface_hub/_commit_api.py +39 -43
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +20 -20
- huggingface_hub/_login.py +17 -43
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +135 -50
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +5 -5
- huggingface_hub/_upload_large_folder.py +18 -32
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/__init__.py +0 -14
- huggingface_hub/cli/_cli_utils.py +143 -39
- huggingface_hub/cli/auth.py +105 -171
- huggingface_hub/cli/cache.py +594 -361
- huggingface_hub/cli/download.py +120 -112
- huggingface_hub/cli/hf.py +38 -41
- huggingface_hub/cli/jobs.py +689 -1017
- huggingface_hub/cli/lfs.py +120 -143
- huggingface_hub/cli/repo.py +282 -216
- huggingface_hub/cli/repo_files.py +50 -84
- huggingface_hub/cli/system.py +6 -25
- huggingface_hub/cli/upload.py +198 -220
- huggingface_hub/cli/upload_large_folder.py +91 -106
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +17 -52
- huggingface_hub/dataclasses.py +135 -21
- huggingface_hub/errors.py +47 -30
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +351 -303
- huggingface_hub/hf_api.py +398 -570
- huggingface_hub/hf_file_system.py +101 -66
- huggingface_hub/hub_mixin.py +32 -54
- huggingface_hub/inference/_client.py +177 -162
- huggingface_hub/inference/_common.py +38 -54
- huggingface_hub/inference/_generated/_async_client.py +218 -258
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/constants.py +1 -2
- huggingface_hub/inference/_mcp/mcp_client.py +33 -22
- huggingface_hub/inference/_mcp/types.py +10 -10
- huggingface_hub/inference/_mcp/utils.py +4 -4
- huggingface_hub/inference/_providers/__init__.py +12 -4
- huggingface_hub/inference/_providers/_common.py +62 -24
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +13 -13
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +21 -94
- huggingface_hub/repocard.py +15 -16
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +11 -6
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +49 -74
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +3 -3
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +371 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +59 -23
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
- huggingface_hub/utils/_typing.py +3 -3
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +1 -1
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
- huggingface_hub-1.0.0.dist-info/RECORD +152 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -204
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -161
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -208
- huggingface_hub/commands/version.py +0 -40
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -497
- huggingface_hub/repository.py +0 -1471
- huggingface_hub/serialization/_tensorflow.py +0 -92
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -15,118 +15,103 @@
|
|
|
15
15
|
"""Contains command to upload a large folder with the CLI."""
|
|
16
16
|
|
|
17
17
|
import os
|
|
18
|
-
from
|
|
19
|
-
|
|
18
|
+
from typing import Annotated, Optional
|
|
19
|
+
|
|
20
|
+
import typer
|
|
20
21
|
|
|
21
22
|
from huggingface_hub import logging
|
|
22
|
-
from huggingface_hub.
|
|
23
|
-
from huggingface_hub.hf_api import HfApi
|
|
24
|
-
from huggingface_hub.utils import disable_progress_bars
|
|
23
|
+
from huggingface_hub.utils import ANSI, disable_progress_bars
|
|
25
24
|
|
|
26
|
-
from ._cli_utils import
|
|
25
|
+
from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
logger = logging.get_logger(__name__)
|
|
30
29
|
|
|
31
30
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
help="
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
help="
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
" - Do not start several processes in parallel.\n"
|
|
102
|
-
" - You can interrupt and resume the process at any time. "
|
|
103
|
-
"The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
|
|
104
|
-
" - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
|
|
105
|
-
"\n"
|
|
106
|
-
f"Some temporary metadata will be stored under `{self.local_path}/.cache/huggingface`.\n"
|
|
107
|
-
" - You must not modify those files manually.\n"
|
|
108
|
-
" - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
|
|
109
|
-
" - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
|
|
110
|
-
"\n"
|
|
111
|
-
"If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
|
|
112
|
-
"You can also entirely disable the status report with `--no-report`.\n"
|
|
113
|
-
"\n"
|
|
114
|
-
"For more details, run `hf upload-large-folder --help` or check the documentation at "
|
|
115
|
-
"https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
|
|
116
|
-
)
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
if self.no_bars:
|
|
120
|
-
disable_progress_bars()
|
|
121
|
-
|
|
122
|
-
self.api.upload_large_folder(
|
|
123
|
-
repo_id=self.repo_id,
|
|
124
|
-
folder_path=self.local_path,
|
|
125
|
-
repo_type=self.repo_type,
|
|
126
|
-
revision=self.revision,
|
|
127
|
-
private=self.private,
|
|
128
|
-
allow_patterns=self.include,
|
|
129
|
-
ignore_patterns=self.exclude,
|
|
130
|
-
num_workers=self.num_workers,
|
|
131
|
-
print_report=not self.no_report,
|
|
31
|
+
def upload_large_folder(
|
|
32
|
+
repo_id: RepoIdArg,
|
|
33
|
+
local_path: Annotated[
|
|
34
|
+
str,
|
|
35
|
+
typer.Argument(
|
|
36
|
+
help="Local path to the folder to upload.",
|
|
37
|
+
),
|
|
38
|
+
],
|
|
39
|
+
repo_type: RepoTypeOpt = RepoType.model,
|
|
40
|
+
revision: RevisionOpt = None,
|
|
41
|
+
private: PrivateOpt = False,
|
|
42
|
+
include: Annotated[
|
|
43
|
+
Optional[list[str]],
|
|
44
|
+
typer.Option(
|
|
45
|
+
help="Glob patterns to match files to upload.",
|
|
46
|
+
),
|
|
47
|
+
] = None,
|
|
48
|
+
exclude: Annotated[
|
|
49
|
+
Optional[list[str]],
|
|
50
|
+
typer.Option(
|
|
51
|
+
help="Glob patterns to exclude from files to upload.",
|
|
52
|
+
),
|
|
53
|
+
] = None,
|
|
54
|
+
token: TokenOpt = None,
|
|
55
|
+
num_workers: Annotated[
|
|
56
|
+
Optional[int],
|
|
57
|
+
typer.Option(
|
|
58
|
+
help="Number of workers to use to hash, upload and commit files.",
|
|
59
|
+
),
|
|
60
|
+
] = None,
|
|
61
|
+
no_report: Annotated[
|
|
62
|
+
bool,
|
|
63
|
+
typer.Option(
|
|
64
|
+
help="Whether to disable regular status report.",
|
|
65
|
+
),
|
|
66
|
+
] = False,
|
|
67
|
+
no_bars: Annotated[
|
|
68
|
+
bool,
|
|
69
|
+
typer.Option(
|
|
70
|
+
help="Whether to disable progress bars.",
|
|
71
|
+
),
|
|
72
|
+
] = False,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Upload a large folder to the Hub. Recommended for resumable uploads."""
|
|
75
|
+
if not os.path.isdir(local_path):
|
|
76
|
+
raise typer.BadParameter("Large upload is only supported for folders.", param_hint="local_path")
|
|
77
|
+
|
|
78
|
+
print(
|
|
79
|
+
ANSI.yellow(
|
|
80
|
+
"You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
|
|
81
|
+
"This is a new feature so feedback is very welcome!\n"
|
|
82
|
+
"\n"
|
|
83
|
+
"A few things to keep in mind:\n"
|
|
84
|
+
" - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
|
|
85
|
+
" - Do not start several processes in parallel.\n"
|
|
86
|
+
" - You can interrupt and resume the process at any time. "
|
|
87
|
+
"The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
|
|
88
|
+
" - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
|
|
89
|
+
"\n"
|
|
90
|
+
f"Some temporary metadata will be stored under `{local_path}/.cache/huggingface`.\n"
|
|
91
|
+
" - You must not modify those files manually.\n"
|
|
92
|
+
" - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
|
|
93
|
+
" - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
|
|
94
|
+
"\n"
|
|
95
|
+
"If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
|
|
96
|
+
"You can also entirely disable the status report with `--no-report`.\n"
|
|
97
|
+
"\n"
|
|
98
|
+
"For more details, run `hf upload-large-folder --help` or check the documentation at "
|
|
99
|
+
"https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
|
|
132
100
|
)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if no_bars:
|
|
104
|
+
disable_progress_bars()
|
|
105
|
+
|
|
106
|
+
api = get_hf_api(token=token)
|
|
107
|
+
api.upload_large_folder(
|
|
108
|
+
repo_id=repo_id,
|
|
109
|
+
folder_path=local_path,
|
|
110
|
+
repo_type=repo_type.value,
|
|
111
|
+
revision=revision,
|
|
112
|
+
private=private,
|
|
113
|
+
allow_patterns=include,
|
|
114
|
+
ignore_patterns=exclude,
|
|
115
|
+
num_workers=num_workers,
|
|
116
|
+
print_report=not no_report,
|
|
117
|
+
)
|
huggingface_hub/community.py
CHANGED
|
@@ -7,7 +7,7 @@ for more information on Pull Requests, Discussions, and the community tab.
|
|
|
7
7
|
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from datetime import datetime
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import Literal, Optional, TypedDict, Union
|
|
11
11
|
|
|
12
12
|
from . import constants
|
|
13
13
|
from .utils import parse_datetime
|
|
@@ -116,7 +116,7 @@ class DiscussionWithDetails(Discussion):
|
|
|
116
116
|
The `datetime` of creation of the Discussion / Pull Request.
|
|
117
117
|
events (`list` of [`DiscussionEvent`])
|
|
118
118
|
The list of [`DiscussionEvents`] in this Discussion or Pull Request.
|
|
119
|
-
conflicting_files (`Union[
|
|
119
|
+
conflicting_files (`Union[list[str], bool, None]`, *optional*):
|
|
120
120
|
A list of conflicting files if this is a Pull Request.
|
|
121
121
|
`None` if `self.is_pull_request` is `False`.
|
|
122
122
|
`True` if there are conflicting files but the list can't be retrieved.
|
|
@@ -136,8 +136,8 @@ class DiscussionWithDetails(Discussion):
|
|
|
136
136
|
(property) URL of the discussion on the Hub.
|
|
137
137
|
"""
|
|
138
138
|
|
|
139
|
-
events:
|
|
140
|
-
conflicting_files: Union[
|
|
139
|
+
events: list["DiscussionEvent"]
|
|
140
|
+
conflicting_files: Union[list[str], bool, None]
|
|
141
141
|
target_branch: Optional[str]
|
|
142
142
|
merge_commit_oid: Optional[str]
|
|
143
143
|
diff: Optional[str]
|
|
@@ -230,7 +230,7 @@ class DiscussionComment(DiscussionEvent):
|
|
|
230
230
|
return self._event["data"]["latest"].get("author", {}).get("name", "deleted")
|
|
231
231
|
|
|
232
232
|
@property
|
|
233
|
-
def edit_history(self) ->
|
|
233
|
+
def edit_history(self) -> list[dict]:
|
|
234
234
|
"""The edit history of the comment"""
|
|
235
235
|
return self._event["data"]["history"]
|
|
236
236
|
|
huggingface_hub/constants.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
3
|
import typing
|
|
4
|
-
from typing import Literal, Optional
|
|
4
|
+
from typing import Literal, Optional
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
# Possible values for env variables
|
|
@@ -35,7 +35,6 @@ DEFAULT_ETAG_TIMEOUT = 10
|
|
|
35
35
|
DEFAULT_DOWNLOAD_TIMEOUT = 10
|
|
36
36
|
DEFAULT_REQUEST_TIMEOUT = 10
|
|
37
37
|
DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
|
|
38
|
-
HF_TRANSFER_CONCURRENCY = 100
|
|
39
38
|
MAX_HTTP_DOWNLOAD_SIZE = 50 * 1000 * 1000 * 1000 # 50 GB
|
|
40
39
|
|
|
41
40
|
# Constants for serialization
|
|
@@ -118,9 +117,9 @@ REPO_TYPES_MAPPING = {
|
|
|
118
117
|
}
|
|
119
118
|
|
|
120
119
|
DiscussionTypeFilter = Literal["all", "discussion", "pull_request"]
|
|
121
|
-
DISCUSSION_TYPES:
|
|
120
|
+
DISCUSSION_TYPES: tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
|
|
122
121
|
DiscussionStatusFilter = Literal["all", "open", "closed"]
|
|
123
|
-
DISCUSSION_STATUS:
|
|
122
|
+
DISCUSSION_STATUS: tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
|
|
124
123
|
|
|
125
124
|
# Webhook subscription types
|
|
126
125
|
WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
|
|
@@ -135,7 +134,6 @@ HF_HOME = os.path.expandvars(
|
|
|
135
134
|
)
|
|
136
135
|
)
|
|
137
136
|
)
|
|
138
|
-
hf_cache_home = HF_HOME # for backward compatibility. TODO: remove this in 1.0.0
|
|
139
137
|
|
|
140
138
|
default_cache_path = os.path.join(HF_HOME, "hub")
|
|
141
139
|
default_assets_cache_path = os.path.join(HF_HOME, "assets")
|
|
@@ -164,6 +162,10 @@ HF_ASSETS_CACHE = os.path.expandvars(
|
|
|
164
162
|
|
|
165
163
|
HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
|
|
166
164
|
|
|
165
|
+
# File created to mark that the version check has been done.
|
|
166
|
+
# Check is performed once per 24 hours at most.
|
|
167
|
+
CHECK_FOR_UPDATE_DONE_PATH = os.path.join(HF_HOME, ".check_for_update_done")
|
|
168
|
+
|
|
167
169
|
# If set, log level will be set to DEBUG and all requests made to the Hub will be logged
|
|
168
170
|
# as curl commands for reproducibility.
|
|
169
171
|
HF_DEBUG = _is_true(os.environ.get("HF_DEBUG"))
|
|
@@ -212,18 +214,18 @@ HF_HUB_DISABLE_EXPERIMENTAL_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISA
|
|
|
212
214
|
# Disable sending the cached token by default is all HTTP requests to the Hub
|
|
213
215
|
HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN"))
|
|
214
216
|
|
|
215
|
-
|
|
216
|
-
# See:
|
|
217
|
-
# - https://pypi.org/project/hf-transfer/
|
|
218
|
-
# - https://github.com/huggingface/hf_transfer (private)
|
|
219
|
-
HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER"))
|
|
217
|
+
HF_XET_HIGH_PERFORMANCE: bool = _is_true(os.environ.get("HF_XET_HIGH_PERFORMANCE"))
|
|
220
218
|
|
|
219
|
+
# hf_transfer is not used anymore. Let's warn user is case they set the env variable
|
|
220
|
+
if _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER")) and not HF_XET_HIGH_PERFORMANCE:
|
|
221
|
+
import warnings
|
|
221
222
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
223
|
+
warnings.warn(
|
|
224
|
+
"The `HF_HUB_ENABLE_HF_TRANSFER` environment variable is deprecated as 'hf_transfer' is not used anymore. "
|
|
225
|
+
"Please use `HF_XET_HIGH_PERFORMANCE` instead to enable high performance transfer with Xet. "
|
|
226
|
+
"Visit https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfxethighperformance for more details.",
|
|
227
|
+
DeprecationWarning,
|
|
228
|
+
)
|
|
227
229
|
|
|
228
230
|
# Used to override the etag timeout on a system level
|
|
229
231
|
HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT
|
|
@@ -234,43 +236,6 @@ HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")
|
|
|
234
236
|
# Allows to add information about the requester in the user-agent (eg. partner name)
|
|
235
237
|
HF_HUB_USER_AGENT_ORIGIN: Optional[str] = os.environ.get("HF_HUB_USER_AGENT_ORIGIN")
|
|
236
238
|
|
|
237
|
-
# List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are
|
|
238
|
-
# deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by
|
|
239
|
-
# default. We still keep the full list of supported frameworks in case we want to scan all of them.
|
|
240
|
-
MAIN_INFERENCE_API_FRAMEWORKS = [
|
|
241
|
-
"diffusers",
|
|
242
|
-
"sentence-transformers",
|
|
243
|
-
"text-generation-inference",
|
|
244
|
-
"transformers",
|
|
245
|
-
]
|
|
246
|
-
|
|
247
|
-
ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [
|
|
248
|
-
"adapter-transformers",
|
|
249
|
-
"allennlp",
|
|
250
|
-
"asteroid",
|
|
251
|
-
"bertopic",
|
|
252
|
-
"doctr",
|
|
253
|
-
"espnet",
|
|
254
|
-
"fairseq",
|
|
255
|
-
"fastai",
|
|
256
|
-
"fasttext",
|
|
257
|
-
"flair",
|
|
258
|
-
"k2",
|
|
259
|
-
"keras",
|
|
260
|
-
"mindspore",
|
|
261
|
-
"nemo",
|
|
262
|
-
"open_clip",
|
|
263
|
-
"paddlenlp",
|
|
264
|
-
"peft",
|
|
265
|
-
"pyannote-audio",
|
|
266
|
-
"sklearn",
|
|
267
|
-
"spacy",
|
|
268
|
-
"span-marker",
|
|
269
|
-
"speechbrain",
|
|
270
|
-
"stanza",
|
|
271
|
-
"timm",
|
|
272
|
-
]
|
|
273
|
-
|
|
274
239
|
# If OAuth didn't work after 2 redirects, there's likely a third-party cookie issue in the Space iframe view.
|
|
275
240
|
# In this case, we redirect the user to the non-iframe view.
|
|
276
241
|
OAUTH_MAX_REDIRECTS = 2
|
huggingface_hub/dataclasses.py
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
import inspect
|
|
2
|
-
from dataclasses import _MISSING_TYPE, MISSING, Field, field, fields
|
|
3
|
-
from functools import wraps
|
|
2
|
+
from dataclasses import _MISSING_TYPE, MISSING, Field, field, fields, make_dataclass
|
|
3
|
+
from functools import lru_cache, wraps
|
|
4
4
|
from typing import (
|
|
5
|
+
Annotated,
|
|
5
6
|
Any,
|
|
6
7
|
Callable,
|
|
7
|
-
Dict,
|
|
8
8
|
ForwardRef,
|
|
9
|
-
List,
|
|
10
9
|
Literal,
|
|
11
10
|
Optional,
|
|
12
|
-
Tuple,
|
|
13
11
|
Type,
|
|
14
12
|
TypeVar,
|
|
15
13
|
Union,
|
|
@@ -18,6 +16,19 @@ from typing import (
|
|
|
18
16
|
overload,
|
|
19
17
|
)
|
|
20
18
|
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
# Python 3.11+
|
|
22
|
+
from typing import NotRequired, Required # type: ignore
|
|
23
|
+
except ImportError:
|
|
24
|
+
try:
|
|
25
|
+
# In case typing_extensions is installed
|
|
26
|
+
from typing_extensions import NotRequired, Required # type: ignore
|
|
27
|
+
except ImportError:
|
|
28
|
+
# Fallback: create dummy types that will never match
|
|
29
|
+
Required = type("Required", (), {}) # type: ignore
|
|
30
|
+
NotRequired = type("NotRequired", (), {}) # type: ignore
|
|
31
|
+
|
|
21
32
|
from .errors import (
|
|
22
33
|
StrictDataclassClassValidationError,
|
|
23
34
|
StrictDataclassDefinitionError,
|
|
@@ -27,6 +38,9 @@ from .errors import (
|
|
|
27
38
|
|
|
28
39
|
Validator_T = Callable[[Any], None]
|
|
29
40
|
T = TypeVar("T")
|
|
41
|
+
TypedDictType = TypeVar("TypedDictType", bound=dict[str, Any])
|
|
42
|
+
|
|
43
|
+
_TYPED_DICT_DEFAULT_VALUE = object() # used as default value in TypedDict fields (to distinguish from None)
|
|
30
44
|
|
|
31
45
|
|
|
32
46
|
# The overload decorator helps type checkers understand the different return types
|
|
@@ -103,7 +117,7 @@ def strict(
|
|
|
103
117
|
)
|
|
104
118
|
|
|
105
119
|
# List and store validators
|
|
106
|
-
field_validators:
|
|
120
|
+
field_validators: dict[str, list[Validator_T]] = {}
|
|
107
121
|
for f in fields(cls): # type: ignore [arg-type]
|
|
108
122
|
validators = []
|
|
109
123
|
validators.append(_create_type_validator(f))
|
|
@@ -238,15 +252,101 @@ def strict(
|
|
|
238
252
|
return wrap(cls) if cls is not None else wrap
|
|
239
253
|
|
|
240
254
|
|
|
255
|
+
def validate_typed_dict(schema: type[TypedDictType], data: dict) -> None:
|
|
256
|
+
"""
|
|
257
|
+
Validate that a dictionary conforms to the types defined in a TypedDict class.
|
|
258
|
+
|
|
259
|
+
Under the hood, the typed dict is converted to a strict dataclass and validated using the `@strict` decorator.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
schema (`type[TypedDictType]`):
|
|
263
|
+
The TypedDict class defining the expected structure and types.
|
|
264
|
+
data (`dict`):
|
|
265
|
+
The dictionary to validate.
|
|
266
|
+
|
|
267
|
+
Raises:
|
|
268
|
+
`StrictDataclassFieldValidationError`:
|
|
269
|
+
If any field in the dictionary does not conform to the expected type.
|
|
270
|
+
|
|
271
|
+
Example:
|
|
272
|
+
```py
|
|
273
|
+
>>> from typing import Annotated, TypedDict
|
|
274
|
+
>>> from huggingface_hub.dataclasses import validate_typed_dict
|
|
275
|
+
|
|
276
|
+
>>> def positive_int(value: int):
|
|
277
|
+
... if not value >= 0:
|
|
278
|
+
... raise ValueError(f"Value must be positive, got {value}")
|
|
279
|
+
|
|
280
|
+
>>> class User(TypedDict):
|
|
281
|
+
... name: str
|
|
282
|
+
... age: Annotated[int, positive_int]
|
|
283
|
+
|
|
284
|
+
>>> # Valid data
|
|
285
|
+
>>> validate_typed_dict(User, {"name": "John", "age": 30})
|
|
286
|
+
|
|
287
|
+
>>> # Invalid type for age
|
|
288
|
+
>>> validate_typed_dict(User, {"name": "John", "age": "30"})
|
|
289
|
+
huggingface_hub.errors.StrictDataclassFieldValidationError: Validation error for field 'age':
|
|
290
|
+
TypeError: Field 'age' expected int, got str (value: '30')
|
|
291
|
+
|
|
292
|
+
>>> # Invalid value for age
|
|
293
|
+
>>> validate_typed_dict(User, {"name": "John", "age": -1})
|
|
294
|
+
huggingface_hub.errors.StrictDataclassFieldValidationError: Validation error for field 'age':
|
|
295
|
+
ValueError: Value must be positive, got -1
|
|
296
|
+
```
|
|
297
|
+
"""
|
|
298
|
+
# Convert typed dict to dataclass
|
|
299
|
+
strict_cls = _build_strict_cls_from_typed_dict(schema)
|
|
300
|
+
|
|
301
|
+
# Validate the data by instantiating the strict dataclass
|
|
302
|
+
strict_cls(**data) # will raise if validation fails
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
@lru_cache
|
|
306
|
+
def _build_strict_cls_from_typed_dict(schema: type[TypedDictType]) -> Type:
|
|
307
|
+
# Extract type hints from the TypedDict class
|
|
308
|
+
type_hints = {
|
|
309
|
+
# We do not use `get_type_hints` here to avoid evaluating ForwardRefs (which might fail).
|
|
310
|
+
# ForwardRefs are not validated by @strict anyway.
|
|
311
|
+
name: value if value is not None else type(None)
|
|
312
|
+
for name, value in schema.__dict__.get("__annotations__", {}).items()
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
# If the TypedDict is not total, wrap fields as NotRequired (unless explicitly Required or NotRequired)
|
|
316
|
+
if not getattr(schema, "__total__", True):
|
|
317
|
+
for key, value in type_hints.items():
|
|
318
|
+
origin = get_origin(value)
|
|
319
|
+
|
|
320
|
+
if origin is Annotated:
|
|
321
|
+
base, *meta = get_args(value)
|
|
322
|
+
if not _is_required_or_notrequired(base):
|
|
323
|
+
base = NotRequired[base]
|
|
324
|
+
type_hints[key] = Annotated[tuple([base] + list(meta))]
|
|
325
|
+
elif not _is_required_or_notrequired(value):
|
|
326
|
+
type_hints[key] = NotRequired[value]
|
|
327
|
+
|
|
328
|
+
# Convert type hints to dataclass fields
|
|
329
|
+
fields = []
|
|
330
|
+
for key, value in type_hints.items():
|
|
331
|
+
if get_origin(value) is Annotated:
|
|
332
|
+
base, *meta = get_args(value)
|
|
333
|
+
fields.append((key, base, field(default=_TYPED_DICT_DEFAULT_VALUE, metadata={"validator": meta[0]})))
|
|
334
|
+
else:
|
|
335
|
+
fields.append((key, value, field(default=_TYPED_DICT_DEFAULT_VALUE)))
|
|
336
|
+
|
|
337
|
+
# Create a strict dataclass from the TypedDict fields
|
|
338
|
+
return strict(make_dataclass(schema.__name__, fields))
|
|
339
|
+
|
|
340
|
+
|
|
241
341
|
def validated_field(
|
|
242
|
-
validator: Union[
|
|
342
|
+
validator: Union[list[Validator_T], Validator_T],
|
|
243
343
|
default: Union[Any, _MISSING_TYPE] = MISSING,
|
|
244
344
|
default_factory: Union[Callable[[], Any], _MISSING_TYPE] = MISSING,
|
|
245
345
|
init: bool = True,
|
|
246
346
|
repr: bool = True,
|
|
247
347
|
hash: Optional[bool] = None,
|
|
248
348
|
compare: bool = True,
|
|
249
|
-
metadata: Optional[
|
|
349
|
+
metadata: Optional[dict] = None,
|
|
250
350
|
**kwargs: Any,
|
|
251
351
|
) -> Any:
|
|
252
352
|
"""
|
|
@@ -255,7 +355,7 @@ def validated_field(
|
|
|
255
355
|
Useful to apply several checks to a field. If only applying one rule, check out the [`as_validated_field`] decorator.
|
|
256
356
|
|
|
257
357
|
Args:
|
|
258
|
-
validator (`Callable` or `
|
|
358
|
+
validator (`Callable` or `list[Callable]`):
|
|
259
359
|
A method that takes a value as input and raises ValueError/TypeError if the value is invalid.
|
|
260
360
|
Can be a list of validators to apply multiple checks.
|
|
261
361
|
**kwargs:
|
|
@@ -297,7 +397,7 @@ def as_validated_field(validator: Validator_T):
|
|
|
297
397
|
repr: bool = True,
|
|
298
398
|
hash: Optional[bool] = None,
|
|
299
399
|
compare: bool = True,
|
|
300
|
-
metadata: Optional[
|
|
400
|
+
metadata: Optional[dict] = None,
|
|
301
401
|
**kwargs: Any,
|
|
302
402
|
):
|
|
303
403
|
return validated_field(
|
|
@@ -328,11 +428,19 @@ def type_validator(name: str, value: Any, expected_type: Any) -> None:
|
|
|
328
428
|
_validate_simple_type(name, value, expected_type)
|
|
329
429
|
elif isinstance(expected_type, ForwardRef) or isinstance(expected_type, str):
|
|
330
430
|
return
|
|
431
|
+
elif origin is Required:
|
|
432
|
+
if value is _TYPED_DICT_DEFAULT_VALUE:
|
|
433
|
+
raise TypeError(f"Field '{name}' is required but missing.")
|
|
434
|
+
type_validator(name, value, args[0])
|
|
435
|
+
elif origin is NotRequired:
|
|
436
|
+
if value is _TYPED_DICT_DEFAULT_VALUE:
|
|
437
|
+
return
|
|
438
|
+
type_validator(name, value, args[0])
|
|
331
439
|
else:
|
|
332
440
|
raise TypeError(f"Unsupported type for field '{name}': {expected_type}")
|
|
333
441
|
|
|
334
442
|
|
|
335
|
-
def _validate_union(name: str, value: Any, args:
|
|
443
|
+
def _validate_union(name: str, value: Any, args: tuple[Any, ...]) -> None:
|
|
336
444
|
"""Validate that value matches one of the types in a Union."""
|
|
337
445
|
errors = []
|
|
338
446
|
for t in args:
|
|
@@ -347,14 +455,14 @@ def _validate_union(name: str, value: Any, args: Tuple[Any, ...]) -> None:
|
|
|
347
455
|
)
|
|
348
456
|
|
|
349
457
|
|
|
350
|
-
def _validate_literal(name: str, value: Any, args:
|
|
458
|
+
def _validate_literal(name: str, value: Any, args: tuple[Any, ...]) -> None:
|
|
351
459
|
"""Validate Literal type."""
|
|
352
460
|
if value not in args:
|
|
353
461
|
raise TypeError(f"Field '{name}' expected one of {args}, got {value}")
|
|
354
462
|
|
|
355
463
|
|
|
356
|
-
def _validate_list(name: str, value: Any, args:
|
|
357
|
-
"""Validate
|
|
464
|
+
def _validate_list(name: str, value: Any, args: tuple[Any, ...]) -> None:
|
|
465
|
+
"""Validate list[T] type."""
|
|
358
466
|
if not isinstance(value, list):
|
|
359
467
|
raise TypeError(f"Field '{name}' expected a list, got {type(value).__name__}")
|
|
360
468
|
|
|
@@ -367,8 +475,8 @@ def _validate_list(name: str, value: Any, args: Tuple[Any, ...]) -> None:
|
|
|
367
475
|
raise TypeError(f"Invalid item at index {i} in list '{name}'") from e
|
|
368
476
|
|
|
369
477
|
|
|
370
|
-
def _validate_dict(name: str, value: Any, args:
|
|
371
|
-
"""Validate
|
|
478
|
+
def _validate_dict(name: str, value: Any, args: tuple[Any, ...]) -> None:
|
|
479
|
+
"""Validate dict[K, V] type."""
|
|
372
480
|
if not isinstance(value, dict):
|
|
373
481
|
raise TypeError(f"Field '{name}' expected a dict, got {type(value).__name__}")
|
|
374
482
|
|
|
@@ -382,19 +490,19 @@ def _validate_dict(name: str, value: Any, args: Tuple[Any, ...]) -> None:
|
|
|
382
490
|
raise TypeError(f"Invalid key or value in dict '{name}'") from e
|
|
383
491
|
|
|
384
492
|
|
|
385
|
-
def _validate_tuple(name: str, value: Any, args:
|
|
493
|
+
def _validate_tuple(name: str, value: Any, args: tuple[Any, ...]) -> None:
|
|
386
494
|
"""Validate Tuple type."""
|
|
387
495
|
if not isinstance(value, tuple):
|
|
388
496
|
raise TypeError(f"Field '{name}' expected a tuple, got {type(value).__name__}")
|
|
389
497
|
|
|
390
|
-
# Handle variable-length tuples:
|
|
498
|
+
# Handle variable-length tuples: tuple[T, ...]
|
|
391
499
|
if len(args) == 2 and args[1] is Ellipsis:
|
|
392
500
|
for i, item in enumerate(value):
|
|
393
501
|
try:
|
|
394
502
|
type_validator(f"{name}[{i}]", item, args[0])
|
|
395
503
|
except TypeError as e:
|
|
396
504
|
raise TypeError(f"Invalid item at index {i} in tuple '{name}'") from e
|
|
397
|
-
# Handle fixed-length tuples:
|
|
505
|
+
# Handle fixed-length tuples: tuple[T1, T2, ...]
|
|
398
506
|
elif len(args) != len(value):
|
|
399
507
|
raise TypeError(f"Field '{name}' expected a tuple of length {len(args)}, got {len(value)}")
|
|
400
508
|
else:
|
|
@@ -405,8 +513,8 @@ def _validate_tuple(name: str, value: Any, args: Tuple[Any, ...]) -> None:
|
|
|
405
513
|
raise TypeError(f"Invalid item at index {i} in tuple '{name}'") from e
|
|
406
514
|
|
|
407
515
|
|
|
408
|
-
def _validate_set(name: str, value: Any, args:
|
|
409
|
-
"""Validate
|
|
516
|
+
def _validate_set(name: str, value: Any, args: tuple[Any, ...]) -> None:
|
|
517
|
+
"""Validate set[T] type."""
|
|
410
518
|
if not isinstance(value, set):
|
|
411
519
|
raise TypeError(f"Field '{name}' expected a set, got {type(value).__name__}")
|
|
412
520
|
|
|
@@ -464,6 +572,11 @@ def _is_validator(validator: Any) -> bool:
|
|
|
464
572
|
return True
|
|
465
573
|
|
|
466
574
|
|
|
575
|
+
def _is_required_or_notrequired(type_hint: Any) -> bool:
|
|
576
|
+
"""Helper to check if a type is Required/NotRequired."""
|
|
577
|
+
return type_hint in (Required, NotRequired) or (get_origin(type_hint) in (Required, NotRequired))
|
|
578
|
+
|
|
579
|
+
|
|
467
580
|
_BASIC_TYPE_VALIDATORS = {
|
|
468
581
|
Union: _validate_union,
|
|
469
582
|
Literal: _validate_literal,
|
|
@@ -476,6 +589,7 @@ _BASIC_TYPE_VALIDATORS = {
|
|
|
476
589
|
|
|
477
590
|
__all__ = [
|
|
478
591
|
"strict",
|
|
592
|
+
"validate_typed_dict",
|
|
479
593
|
"validated_field",
|
|
480
594
|
"Validator_T",
|
|
481
595
|
"StrictDataclassClassValidationError",
|