huggingface-hub 0.25.2__py3-none-any.whl → 0.26.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +45 -11
- huggingface_hub/_login.py +172 -33
- huggingface_hub/commands/user.py +125 -9
- huggingface_hub/constants.py +1 -1
- huggingface_hub/errors.py +6 -9
- huggingface_hub/file_download.py +2 -372
- huggingface_hub/hf_api.py +170 -13
- huggingface_hub/hf_file_system.py +3 -3
- huggingface_hub/hub_mixin.py +2 -1
- huggingface_hub/inference/_client.py +500 -145
- huggingface_hub/inference/_common.py +42 -4
- huggingface_hub/inference/_generated/_async_client.py +499 -144
- huggingface_hub/inference/_generated/types/__init__.py +37 -7
- huggingface_hub/inference/_generated/types/audio_classification.py +8 -5
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +9 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +23 -4
- huggingface_hub/inference/_generated/types/image_classification.py +8 -5
- huggingface_hub/inference/_generated/types/image_segmentation.py +9 -7
- huggingface_hub/inference/_generated/types/image_to_image.py +7 -5
- huggingface_hub/inference/_generated/types/image_to_text.py +4 -4
- huggingface_hub/inference/_generated/types/object_detection.py +11 -5
- huggingface_hub/inference/_generated/types/summarization.py +11 -13
- huggingface_hub/inference/_generated/types/text_classification.py +10 -5
- huggingface_hub/inference/_generated/types/text_generation.py +1 -0
- huggingface_hub/inference/_generated/types/text_to_audio.py +2 -2
- huggingface_hub/inference/_generated/types/text_to_image.py +9 -7
- huggingface_hub/inference/_generated/types/text_to_speech.py +107 -0
- huggingface_hub/inference/_generated/types/translation.py +17 -11
- huggingface_hub/inference/_generated/types/video_classification.py +2 -2
- huggingface_hub/repocard.py +2 -1
- huggingface_hub/repocard_data.py +10 -2
- huggingface_hub/serialization/_torch.py +7 -4
- huggingface_hub/utils/__init__.py +4 -20
- huggingface_hub/utils/{_token.py → _auth.py} +86 -3
- huggingface_hub/utils/_headers.py +1 -1
- huggingface_hub/utils/_hf_folder.py +1 -1
- huggingface_hub/utils/_http.py +10 -4
- huggingface_hub/utils/_runtime.py +1 -10
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/METADATA +12 -12
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/RECORD +44 -44
- huggingface_hub/inference/_templating.py +0 -102
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.25.2.dist-info → huggingface_hub-0.26.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/commands/user.py
CHANGED
|
@@ -11,32 +11,64 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
"""Contains commands to authenticate to the Hugging Face Hub and interact with your repositories.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
# login and save token locally.
|
|
18
|
+
huggingface-cli login --token=hf_*** --add-to-git-credential
|
|
19
|
+
|
|
20
|
+
# switch between tokens
|
|
21
|
+
huggingface-cli auth switch
|
|
22
|
+
|
|
23
|
+
# list all tokens
|
|
24
|
+
huggingface-cli auth list
|
|
25
|
+
|
|
26
|
+
# logout from a specific token, if no token-name is provided, all tokens will be deleted from your machine.
|
|
27
|
+
huggingface-cli logout --token-name=your_token_name
|
|
28
|
+
|
|
29
|
+
# find out which huggingface.co account you are logged in as
|
|
30
|
+
huggingface-cli whoami
|
|
31
|
+
|
|
32
|
+
# create a new dataset repo on the Hub
|
|
33
|
+
huggingface-cli repo create mydataset --type=dataset
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
|
|
14
37
|
import subprocess
|
|
15
38
|
from argparse import _SubParsersAction
|
|
39
|
+
from typing import Optional
|
|
16
40
|
|
|
17
41
|
from requests.exceptions import HTTPError
|
|
18
42
|
|
|
19
43
|
from huggingface_hub.commands import BaseHuggingfaceCLICommand
|
|
20
|
-
from huggingface_hub.constants import
|
|
21
|
-
ENDPOINT,
|
|
22
|
-
REPO_TYPES,
|
|
23
|
-
REPO_TYPES_URL_PREFIXES,
|
|
24
|
-
SPACES_SDK_TYPES,
|
|
25
|
-
)
|
|
44
|
+
from huggingface_hub.constants import ENDPOINT, REPO_TYPES, REPO_TYPES_URL_PREFIXES, SPACES_SDK_TYPES
|
|
26
45
|
from huggingface_hub.hf_api import HfApi
|
|
27
46
|
|
|
28
47
|
from .._login import ( # noqa: F401 # for backward compatibility # noqa: F401 # for backward compatibility
|
|
29
48
|
NOTEBOOK_LOGIN_PASSWORD_HTML,
|
|
30
49
|
NOTEBOOK_LOGIN_TOKEN_HTML_END,
|
|
31
50
|
NOTEBOOK_LOGIN_TOKEN_HTML_START,
|
|
51
|
+
auth_list,
|
|
52
|
+
auth_switch,
|
|
32
53
|
login,
|
|
33
54
|
logout,
|
|
34
55
|
notebook_login,
|
|
35
56
|
)
|
|
36
|
-
from ..utils import get_token
|
|
57
|
+
from ..utils import get_stored_tokens, get_token, logging
|
|
37
58
|
from ._cli_utils import ANSI
|
|
38
59
|
|
|
39
60
|
|
|
61
|
+
logger = logging.get_logger(__name__)
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
from InquirerPy import inquirer
|
|
65
|
+
from InquirerPy.base.control import Choice
|
|
66
|
+
|
|
67
|
+
_inquirer_py_available = True
|
|
68
|
+
except ImportError:
|
|
69
|
+
_inquirer_py_available = False
|
|
70
|
+
|
|
71
|
+
|
|
40
72
|
class UserCommands(BaseHuggingfaceCLICommand):
|
|
41
73
|
@staticmethod
|
|
42
74
|
def register_subcommand(parser: _SubParsersAction):
|
|
@@ -54,9 +86,31 @@ class UserCommands(BaseHuggingfaceCLICommand):
|
|
|
54
86
|
login_parser.set_defaults(func=lambda args: LoginCommand(args))
|
|
55
87
|
whoami_parser = parser.add_parser("whoami", help="Find out which huggingface.co account you are logged in as.")
|
|
56
88
|
whoami_parser.set_defaults(func=lambda args: WhoamiCommand(args))
|
|
89
|
+
|
|
57
90
|
logout_parser = parser.add_parser("logout", help="Log out")
|
|
91
|
+
logout_parser.add_argument(
|
|
92
|
+
"--token-name",
|
|
93
|
+
type=str,
|
|
94
|
+
help="Optional: Name of the access token to log out from.",
|
|
95
|
+
)
|
|
58
96
|
logout_parser.set_defaults(func=lambda args: LogoutCommand(args))
|
|
59
97
|
|
|
98
|
+
auth_parser = parser.add_parser("auth", help="Other authentication related commands")
|
|
99
|
+
auth_subparsers = auth_parser.add_subparsers(help="Authentication subcommands")
|
|
100
|
+
auth_switch_parser = auth_subparsers.add_parser("switch", help="Switch between access tokens")
|
|
101
|
+
auth_switch_parser.add_argument(
|
|
102
|
+
"--token-name",
|
|
103
|
+
type=str,
|
|
104
|
+
help="Optional: Name of the access token to switch to.",
|
|
105
|
+
)
|
|
106
|
+
auth_switch_parser.add_argument(
|
|
107
|
+
"--add-to-git-credential",
|
|
108
|
+
action="store_true",
|
|
109
|
+
help="Optional: Save token to git credential helper.",
|
|
110
|
+
)
|
|
111
|
+
auth_switch_parser.set_defaults(func=lambda args: AuthSwitchCommand(args))
|
|
112
|
+
auth_list_parser = auth_subparsers.add_parser("list", help="List all stored access tokens")
|
|
113
|
+
auth_list_parser.set_defaults(func=lambda args: AuthListCommand(args))
|
|
60
114
|
# new system: git-based repo system
|
|
61
115
|
repo_parser = parser.add_parser("repo", help="{create} Commands to interact with your huggingface.co repos.")
|
|
62
116
|
repo_subparsers = repo_parser.add_subparsers(help="huggingface.co repos related commands")
|
|
@@ -95,12 +149,74 @@ class BaseUserCommand:
|
|
|
95
149
|
|
|
96
150
|
class LoginCommand(BaseUserCommand):
|
|
97
151
|
def run(self):
|
|
98
|
-
|
|
152
|
+
logging.set_verbosity_info()
|
|
153
|
+
login(
|
|
154
|
+
token=self.args.token,
|
|
155
|
+
add_to_git_credential=self.args.add_to_git_credential,
|
|
156
|
+
)
|
|
99
157
|
|
|
100
158
|
|
|
101
159
|
class LogoutCommand(BaseUserCommand):
|
|
102
160
|
def run(self):
|
|
103
|
-
|
|
161
|
+
logging.set_verbosity_info()
|
|
162
|
+
logout(token_name=self.args.token_name)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class AuthSwitchCommand(BaseUserCommand):
|
|
166
|
+
def run(self):
|
|
167
|
+
logging.set_verbosity_info()
|
|
168
|
+
token_name = self.args.token_name
|
|
169
|
+
if token_name is None:
|
|
170
|
+
token_name = self._select_token_name()
|
|
171
|
+
|
|
172
|
+
if token_name is None:
|
|
173
|
+
print("No token name provided. Aborting.")
|
|
174
|
+
exit()
|
|
175
|
+
auth_switch(token_name, add_to_git_credential=self.args.add_to_git_credential)
|
|
176
|
+
|
|
177
|
+
def _select_token_name(self) -> Optional[str]:
|
|
178
|
+
token_names = list(get_stored_tokens().keys())
|
|
179
|
+
|
|
180
|
+
if not token_names:
|
|
181
|
+
logger.error("No stored tokens found. Please login first.")
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
if _inquirer_py_available:
|
|
185
|
+
return self._select_token_name_tui(token_names)
|
|
186
|
+
# if inquirer is not available, use a simpler terminal UI
|
|
187
|
+
print("Available stored tokens:")
|
|
188
|
+
for i, token_name in enumerate(token_names, 1):
|
|
189
|
+
print(f"{i}. {token_name}")
|
|
190
|
+
while True:
|
|
191
|
+
try:
|
|
192
|
+
choice = input("Enter the number of the token to switch to (or 'q' to quit): ")
|
|
193
|
+
if choice.lower() == "q":
|
|
194
|
+
return None
|
|
195
|
+
index = int(choice) - 1
|
|
196
|
+
if 0 <= index < len(token_names):
|
|
197
|
+
return token_names[index]
|
|
198
|
+
else:
|
|
199
|
+
print("Invalid selection. Please try again.")
|
|
200
|
+
except ValueError:
|
|
201
|
+
print("Invalid input. Please enter a number or 'q' to quit.")
|
|
202
|
+
|
|
203
|
+
def _select_token_name_tui(self, token_names: list[str]) -> Optional[str]:
|
|
204
|
+
choices = [Choice(token_name, name=token_name) for token_name in token_names]
|
|
205
|
+
try:
|
|
206
|
+
return inquirer.select(
|
|
207
|
+
message="Select a token to switch to:",
|
|
208
|
+
choices=choices,
|
|
209
|
+
default=None,
|
|
210
|
+
).execute()
|
|
211
|
+
except KeyboardInterrupt:
|
|
212
|
+
logger.info("Token selection cancelled.")
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class AuthListCommand(BaseUserCommand):
|
|
217
|
+
def run(self):
|
|
218
|
+
logging.set_verbosity_info()
|
|
219
|
+
auth_list()
|
|
104
220
|
|
|
105
221
|
|
|
106
222
|
class WhoamiCommand(BaseUserCommand):
|
huggingface_hub/constants.py
CHANGED
|
@@ -140,7 +140,7 @@ HF_HUB_DISABLE_TELEMETRY = (
|
|
|
140
140
|
# See https://github.com/huggingface/huggingface_hub/issues/1232
|
|
141
141
|
_OLD_HF_TOKEN_PATH = os.path.expanduser("~/.huggingface/token")
|
|
142
142
|
HF_TOKEN_PATH = os.environ.get("HF_TOKEN_PATH", os.path.join(HF_HOME, "token"))
|
|
143
|
-
|
|
143
|
+
HF_STORED_TOKENS_PATH = os.path.join(os.path.dirname(HF_TOKEN_PATH), "stored_tokens")
|
|
144
144
|
|
|
145
145
|
if _staging_mode:
|
|
146
146
|
# In staging mode, we use a different cache to ensure we don't mix up production and staging data or tokens
|
huggingface_hub/errors.py
CHANGED
|
@@ -45,7 +45,7 @@ class HfHubHTTPError(HTTPError):
|
|
|
45
45
|
sent back by the server, it will be added to the error message.
|
|
46
46
|
|
|
47
47
|
Added details:
|
|
48
|
-
- Request id from "X-Request-Id" header if exists.
|
|
48
|
+
- Request id from "X-Request-Id" header if exists. If not, fallback to "X-Amzn-Trace-Id" header if exists.
|
|
49
49
|
- Server error message from the header "X-Error-Message".
|
|
50
50
|
- Server error message if we can found one in the response body.
|
|
51
51
|
|
|
@@ -68,7 +68,11 @@ class HfHubHTTPError(HTTPError):
|
|
|
68
68
|
"""
|
|
69
69
|
|
|
70
70
|
def __init__(self, message: str, response: Optional[Response] = None, *, server_message: Optional[str] = None):
|
|
71
|
-
self.request_id =
|
|
71
|
+
self.request_id = (
|
|
72
|
+
response.headers.get("x-request-id") or response.headers.get("X-Amzn-Trace-Id")
|
|
73
|
+
if response is not None
|
|
74
|
+
else None
|
|
75
|
+
)
|
|
72
76
|
self.server_message = server_message
|
|
73
77
|
|
|
74
78
|
super().__init__(
|
|
@@ -116,13 +120,6 @@ class NotASafetensorsRepoError(Exception):
|
|
|
116
120
|
"""
|
|
117
121
|
|
|
118
122
|
|
|
119
|
-
# TEMPLATING ERRORS
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
class TemplateError(Exception):
|
|
123
|
-
"""Any error raised while trying to fetch or render a chat template."""
|
|
124
|
-
|
|
125
|
-
|
|
126
123
|
# TEXT GENERATION ERRORS
|
|
127
124
|
|
|
128
125
|
|
huggingface_hub/file_download.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import copy
|
|
3
3
|
import errno
|
|
4
|
-
import fnmatch
|
|
5
4
|
import inspect
|
|
6
|
-
import json
|
|
7
5
|
import os
|
|
8
6
|
import re
|
|
9
7
|
import shutil
|
|
@@ -22,11 +20,7 @@ from . import (
|
|
|
22
20
|
__version__, # noqa: F401 # for backward compatibility
|
|
23
21
|
constants,
|
|
24
22
|
)
|
|
25
|
-
from ._local_folder import
|
|
26
|
-
get_local_download_paths,
|
|
27
|
-
read_download_metadata,
|
|
28
|
-
write_download_metadata,
|
|
29
|
-
)
|
|
23
|
+
from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata
|
|
30
24
|
from .constants import (
|
|
31
25
|
HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility
|
|
32
26
|
HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
|
|
@@ -65,10 +59,8 @@ from .utils import (
|
|
|
65
59
|
tqdm,
|
|
66
60
|
validate_hf_hub_args,
|
|
67
61
|
)
|
|
68
|
-
from .utils._deprecation import _deprecate_arguments, _deprecate_method
|
|
69
62
|
from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility
|
|
70
63
|
from .utils._typing import HTTP_METHOD_T
|
|
71
|
-
from .utils.insecure_hashlib import sha256
|
|
72
64
|
from .utils.sha import sha_fileobj
|
|
73
65
|
|
|
74
66
|
|
|
@@ -262,85 +254,6 @@ def hf_hub_url(
|
|
|
262
254
|
return url
|
|
263
255
|
|
|
264
256
|
|
|
265
|
-
@_deprecate_method(version="0.26", message="Use `hf_hub_download` to benefit from the new cache layout.")
|
|
266
|
-
def url_to_filename(url: str, etag: Optional[str] = None) -> str:
|
|
267
|
-
"""Generate a local filename from a url.
|
|
268
|
-
|
|
269
|
-
Convert `url` into a hashed filename in a reproducible way. If `etag` is
|
|
270
|
-
specified, append its hash to the url's, delimited by a period. If the url
|
|
271
|
-
ends with .h5 (Keras HDF5 weights) adds '.h5' to the name so that TF 2.0 can
|
|
272
|
-
identify it as a HDF5 file (see
|
|
273
|
-
https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380)
|
|
274
|
-
|
|
275
|
-
Args:
|
|
276
|
-
url (`str`):
|
|
277
|
-
The address to the file.
|
|
278
|
-
etag (`str`, *optional*):
|
|
279
|
-
The ETag of the file.
|
|
280
|
-
|
|
281
|
-
Returns:
|
|
282
|
-
The generated filename.
|
|
283
|
-
"""
|
|
284
|
-
url_bytes = url.encode("utf-8")
|
|
285
|
-
filename = sha256(url_bytes).hexdigest()
|
|
286
|
-
|
|
287
|
-
if etag:
|
|
288
|
-
etag_bytes = etag.encode("utf-8")
|
|
289
|
-
filename += "." + sha256(etag_bytes).hexdigest()
|
|
290
|
-
|
|
291
|
-
if url.endswith(".h5"):
|
|
292
|
-
filename += ".h5"
|
|
293
|
-
|
|
294
|
-
return filename
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
@_deprecate_method(version="0.26", message="Use `hf_hub_url` instead.")
|
|
298
|
-
def filename_to_url(
|
|
299
|
-
filename,
|
|
300
|
-
cache_dir: Optional[str] = None,
|
|
301
|
-
legacy_cache_layout: bool = False,
|
|
302
|
-
) -> Tuple[str, str]:
|
|
303
|
-
"""
|
|
304
|
-
Return the url and etag (which may be `None`) stored for `filename`. Raise
|
|
305
|
-
`EnvironmentError` if `filename` or its stored metadata do not exist.
|
|
306
|
-
|
|
307
|
-
Args:
|
|
308
|
-
filename (`str`):
|
|
309
|
-
The name of the file
|
|
310
|
-
cache_dir (`str`, *optional*):
|
|
311
|
-
The cache directory to use instead of the default one.
|
|
312
|
-
legacy_cache_layout (`bool`, *optional*, defaults to `False`):
|
|
313
|
-
If `True`, uses the legacy file cache layout i.e. just call `hf_hub_url`
|
|
314
|
-
then `cached_download`. This is deprecated as the new cache layout is
|
|
315
|
-
more powerful.
|
|
316
|
-
"""
|
|
317
|
-
if not legacy_cache_layout:
|
|
318
|
-
warnings.warn(
|
|
319
|
-
"`filename_to_url` uses the legacy way cache file layout",
|
|
320
|
-
FutureWarning,
|
|
321
|
-
)
|
|
322
|
-
|
|
323
|
-
if cache_dir is None:
|
|
324
|
-
cache_dir = constants.HF_HUB_CACHE
|
|
325
|
-
if isinstance(cache_dir, Path):
|
|
326
|
-
cache_dir = str(cache_dir)
|
|
327
|
-
|
|
328
|
-
cache_path = os.path.join(cache_dir, filename)
|
|
329
|
-
if not os.path.exists(cache_path):
|
|
330
|
-
raise EnvironmentError(f"file {cache_path} not found")
|
|
331
|
-
|
|
332
|
-
meta_path = cache_path + ".json"
|
|
333
|
-
if not os.path.exists(meta_path):
|
|
334
|
-
raise EnvironmentError(f"file {meta_path} not found")
|
|
335
|
-
|
|
336
|
-
with open(meta_path, encoding="utf-8") as meta_file:
|
|
337
|
-
metadata = json.load(meta_file)
|
|
338
|
-
url = metadata["url"]
|
|
339
|
-
etag = metadata["etag"]
|
|
340
|
-
|
|
341
|
-
return url, etag
|
|
342
|
-
|
|
343
|
-
|
|
344
257
|
def _request_wrapper(
|
|
345
258
|
method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
|
|
346
259
|
) -> requests.Response:
|
|
@@ -574,249 +487,6 @@ def http_get(
|
|
|
574
487
|
)
|
|
575
488
|
|
|
576
489
|
|
|
577
|
-
@validate_hf_hub_args
|
|
578
|
-
@_deprecate_method(version="0.26", message="Use `hf_hub_download` instead.")
|
|
579
|
-
def cached_download(
|
|
580
|
-
url: str,
|
|
581
|
-
*,
|
|
582
|
-
library_name: Optional[str] = None,
|
|
583
|
-
library_version: Optional[str] = None,
|
|
584
|
-
cache_dir: Union[str, Path, None] = None,
|
|
585
|
-
user_agent: Union[Dict, str, None] = None,
|
|
586
|
-
force_download: bool = False,
|
|
587
|
-
force_filename: Optional[str] = None,
|
|
588
|
-
proxies: Optional[Dict] = None,
|
|
589
|
-
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
590
|
-
resume_download: Optional[bool] = None,
|
|
591
|
-
token: Union[bool, str, None] = None,
|
|
592
|
-
local_files_only: bool = False,
|
|
593
|
-
legacy_cache_layout: bool = False,
|
|
594
|
-
) -> str:
|
|
595
|
-
"""
|
|
596
|
-
Download from a given URL and cache it if it's not already present in the
|
|
597
|
-
local cache.
|
|
598
|
-
|
|
599
|
-
Given a URL, this function looks for the corresponding file in the local
|
|
600
|
-
cache. If it's not there, download it. Then return the path to the cached
|
|
601
|
-
file.
|
|
602
|
-
|
|
603
|
-
Will raise errors tailored to the Hugging Face Hub.
|
|
604
|
-
|
|
605
|
-
Args:
|
|
606
|
-
url (`str`):
|
|
607
|
-
The path to the file to be downloaded.
|
|
608
|
-
library_name (`str`, *optional*):
|
|
609
|
-
The name of the library to which the object corresponds.
|
|
610
|
-
library_version (`str`, *optional*):
|
|
611
|
-
The version of the library.
|
|
612
|
-
cache_dir (`str`, `Path`, *optional*):
|
|
613
|
-
Path to the folder where cached files are stored.
|
|
614
|
-
user_agent (`dict`, `str`, *optional*):
|
|
615
|
-
The user-agent info in the form of a dictionary or a string.
|
|
616
|
-
force_download (`bool`, *optional*, defaults to `False`):
|
|
617
|
-
Whether the file should be downloaded even if it already exists in
|
|
618
|
-
the local cache.
|
|
619
|
-
force_filename (`str`, *optional*):
|
|
620
|
-
Use this name instead of a generated file name.
|
|
621
|
-
proxies (`dict`, *optional*):
|
|
622
|
-
Dictionary mapping protocol to the URL of the proxy passed to
|
|
623
|
-
`requests.request`.
|
|
624
|
-
etag_timeout (`float`, *optional* defaults to `10`):
|
|
625
|
-
When fetching ETag, how many seconds to wait for the server to send
|
|
626
|
-
data before giving up which is passed to `requests.request`.
|
|
627
|
-
token (`bool`, `str`, *optional*):
|
|
628
|
-
A token to be used for the download.
|
|
629
|
-
- If `True`, the token is read from the HuggingFace config
|
|
630
|
-
folder.
|
|
631
|
-
- If a string, it's used as the authentication token.
|
|
632
|
-
local_files_only (`bool`, *optional*, defaults to `False`):
|
|
633
|
-
If `True`, avoid downloading the file and return the path to the
|
|
634
|
-
local cached file if it exists.
|
|
635
|
-
legacy_cache_layout (`bool`, *optional*, defaults to `False`):
|
|
636
|
-
Set this parameter to `True` to mention that you'd like to continue
|
|
637
|
-
the old cache layout. Putting this to `True` manually will not raise
|
|
638
|
-
any warning when using `cached_download`. We recommend using
|
|
639
|
-
`hf_hub_download` to take advantage of the new cache.
|
|
640
|
-
|
|
641
|
-
Returns:
|
|
642
|
-
Local path (string) of file or if networking is off, last version of
|
|
643
|
-
file cached on disk.
|
|
644
|
-
|
|
645
|
-
<Tip>
|
|
646
|
-
|
|
647
|
-
Raises the following errors:
|
|
648
|
-
|
|
649
|
-
- [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
|
|
650
|
-
if `token=True` and the token cannot be found.
|
|
651
|
-
- [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
|
|
652
|
-
if ETag cannot be determined.
|
|
653
|
-
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
654
|
-
if some parameter value is invalid
|
|
655
|
-
- [`~utils.RepositoryNotFoundError`]
|
|
656
|
-
If the repository to download from cannot be found. This may be because it doesn't exist,
|
|
657
|
-
or because it is set to `private` and you do not have access.
|
|
658
|
-
- [`~utils.RevisionNotFoundError`]
|
|
659
|
-
If the revision to download from cannot be found.
|
|
660
|
-
- [`~utils.EntryNotFoundError`]
|
|
661
|
-
If the file to download cannot be found.
|
|
662
|
-
- [`~utils.LocalEntryNotFoundError`]
|
|
663
|
-
If network is disabled or unavailable and file is not found in cache.
|
|
664
|
-
|
|
665
|
-
</Tip>
|
|
666
|
-
"""
|
|
667
|
-
if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT:
|
|
668
|
-
# Respect environment variable above user value
|
|
669
|
-
etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
|
|
670
|
-
|
|
671
|
-
if not legacy_cache_layout:
|
|
672
|
-
warnings.warn(
|
|
673
|
-
"'cached_download' is the legacy way to download files from the HF hub, please consider upgrading to"
|
|
674
|
-
" 'hf_hub_download'",
|
|
675
|
-
FutureWarning,
|
|
676
|
-
)
|
|
677
|
-
if resume_download is not None:
|
|
678
|
-
warnings.warn(
|
|
679
|
-
"`resume_download` is deprecated and will be removed in version 1.0.0. "
|
|
680
|
-
"Downloads always resume when possible. "
|
|
681
|
-
"If you want to force a new download, use `force_download=True`.",
|
|
682
|
-
FutureWarning,
|
|
683
|
-
)
|
|
684
|
-
|
|
685
|
-
if cache_dir is None:
|
|
686
|
-
cache_dir = constants.HF_HUB_CACHE
|
|
687
|
-
if isinstance(cache_dir, Path):
|
|
688
|
-
cache_dir = str(cache_dir)
|
|
689
|
-
|
|
690
|
-
os.makedirs(cache_dir, exist_ok=True)
|
|
691
|
-
|
|
692
|
-
headers = build_hf_headers(
|
|
693
|
-
token=token,
|
|
694
|
-
library_name=library_name,
|
|
695
|
-
library_version=library_version,
|
|
696
|
-
user_agent=user_agent,
|
|
697
|
-
)
|
|
698
|
-
|
|
699
|
-
url_to_download = url
|
|
700
|
-
etag = None
|
|
701
|
-
expected_size = None
|
|
702
|
-
if not local_files_only:
|
|
703
|
-
try:
|
|
704
|
-
# Temporary header: we want the full (decompressed) content size returned to be able to check the
|
|
705
|
-
# downloaded file size
|
|
706
|
-
headers["Accept-Encoding"] = "identity"
|
|
707
|
-
r = _request_wrapper(
|
|
708
|
-
method="HEAD",
|
|
709
|
-
url=url,
|
|
710
|
-
headers=headers,
|
|
711
|
-
allow_redirects=False,
|
|
712
|
-
follow_relative_redirects=True,
|
|
713
|
-
proxies=proxies,
|
|
714
|
-
timeout=etag_timeout,
|
|
715
|
-
)
|
|
716
|
-
headers.pop("Accept-Encoding", None)
|
|
717
|
-
hf_raise_for_status(r)
|
|
718
|
-
etag = r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")
|
|
719
|
-
# We favor a custom header indicating the etag of the linked resource, and
|
|
720
|
-
# we fallback to the regular etag header.
|
|
721
|
-
# If we don't have any of those, raise an error.
|
|
722
|
-
if etag is None:
|
|
723
|
-
raise FileMetadataError(
|
|
724
|
-
"Distant resource does not have an ETag, we won't be able to reliably ensure reproducibility."
|
|
725
|
-
)
|
|
726
|
-
# We get the expected size of the file, to check the download went well.
|
|
727
|
-
expected_size = _int_or_none(r.headers.get("Content-Length"))
|
|
728
|
-
# In case of a redirect, save an extra redirect on the request.get call,
|
|
729
|
-
# and ensure we download the exact atomic version even if it changed
|
|
730
|
-
# between the HEAD and the GET (unlikely, but hey).
|
|
731
|
-
# Useful for lfs blobs that are stored on a CDN.
|
|
732
|
-
if 300 <= r.status_code <= 399:
|
|
733
|
-
url_to_download = r.headers["Location"]
|
|
734
|
-
headers.pop("authorization", None)
|
|
735
|
-
expected_size = None # redirected -> can't know the expected size
|
|
736
|
-
except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
|
|
737
|
-
# Actually raise for those subclasses of ConnectionError
|
|
738
|
-
raise
|
|
739
|
-
except (
|
|
740
|
-
requests.exceptions.ConnectionError,
|
|
741
|
-
requests.exceptions.Timeout,
|
|
742
|
-
OfflineModeIsEnabled,
|
|
743
|
-
):
|
|
744
|
-
# Otherwise, our Internet connection is down.
|
|
745
|
-
# etag is None
|
|
746
|
-
pass
|
|
747
|
-
|
|
748
|
-
filename = force_filename if force_filename is not None else url_to_filename(url, etag)
|
|
749
|
-
|
|
750
|
-
# get cache path to put the file
|
|
751
|
-
cache_path = os.path.join(cache_dir, filename)
|
|
752
|
-
|
|
753
|
-
# etag is None == we don't have a connection or we passed local_files_only.
|
|
754
|
-
# try to get the last downloaded one
|
|
755
|
-
if etag is None:
|
|
756
|
-
if os.path.exists(cache_path) and not force_download:
|
|
757
|
-
return cache_path
|
|
758
|
-
else:
|
|
759
|
-
matching_files = [
|
|
760
|
-
file
|
|
761
|
-
for file in fnmatch.filter(os.listdir(cache_dir), filename.split(".")[0] + ".*")
|
|
762
|
-
if not file.endswith(".json") and not file.endswith(".lock")
|
|
763
|
-
]
|
|
764
|
-
if len(matching_files) > 0 and not force_download and force_filename is None:
|
|
765
|
-
return os.path.join(cache_dir, matching_files[-1])
|
|
766
|
-
else:
|
|
767
|
-
# If files cannot be found and local_files_only=True,
|
|
768
|
-
# the models might've been found if local_files_only=False
|
|
769
|
-
# Notify the user about that
|
|
770
|
-
if local_files_only:
|
|
771
|
-
raise LocalEntryNotFoundError(
|
|
772
|
-
"Cannot find the requested files in the cached path and"
|
|
773
|
-
" outgoing traffic has been disabled. To enable model look-ups"
|
|
774
|
-
" and downloads online, set 'local_files_only' to False."
|
|
775
|
-
)
|
|
776
|
-
else:
|
|
777
|
-
raise LocalEntryNotFoundError(
|
|
778
|
-
"Connection error, and we cannot find the requested files in"
|
|
779
|
-
" the cached path. Please try again or make sure your Internet"
|
|
780
|
-
" connection is on."
|
|
781
|
-
)
|
|
782
|
-
|
|
783
|
-
# From now on, etag is not None.
|
|
784
|
-
if os.path.exists(cache_path) and not force_download:
|
|
785
|
-
return cache_path
|
|
786
|
-
|
|
787
|
-
# Prevent parallel downloads of the same file with a lock.
|
|
788
|
-
lock_path = cache_path + ".lock"
|
|
789
|
-
|
|
790
|
-
# Some Windows versions do not allow for paths longer than 255 characters.
|
|
791
|
-
# In this case, we must specify it is an extended path by using the "\\?\" prefix.
|
|
792
|
-
if os.name == "nt" and len(os.path.abspath(lock_path)) > 255:
|
|
793
|
-
lock_path = "\\\\?\\" + os.path.abspath(lock_path)
|
|
794
|
-
|
|
795
|
-
if os.name == "nt" and len(os.path.abspath(cache_path)) > 255:
|
|
796
|
-
cache_path = "\\\\?\\" + os.path.abspath(cache_path)
|
|
797
|
-
|
|
798
|
-
with WeakFileLock(lock_path):
|
|
799
|
-
_download_to_tmp_and_move(
|
|
800
|
-
incomplete_path=Path(cache_path + ".incomplete"),
|
|
801
|
-
destination_path=Path(cache_path),
|
|
802
|
-
url_to_download=url_to_download,
|
|
803
|
-
proxies=proxies,
|
|
804
|
-
headers=headers,
|
|
805
|
-
expected_size=expected_size,
|
|
806
|
-
filename=filename,
|
|
807
|
-
force_download=force_download,
|
|
808
|
-
)
|
|
809
|
-
|
|
810
|
-
if force_filename is None:
|
|
811
|
-
logger.info("creating metadata file for %s", cache_path)
|
|
812
|
-
meta = {"url": url, "etag": etag}
|
|
813
|
-
meta_path = cache_path + ".json"
|
|
814
|
-
with open(meta_path, "w") as meta_file:
|
|
815
|
-
json.dump(meta, meta_file)
|
|
816
|
-
|
|
817
|
-
return cache_path
|
|
818
|
-
|
|
819
|
-
|
|
820
490
|
def _normalize_etag(etag: Optional[str]) -> Optional[str]:
|
|
821
491
|
"""Normalize ETag HTTP header, so it can be used to create nice filepaths.
|
|
822
492
|
|
|
@@ -990,14 +660,6 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
|
|
|
990
660
|
pass
|
|
991
661
|
|
|
992
662
|
|
|
993
|
-
@_deprecate_arguments(
|
|
994
|
-
version="0.26.0",
|
|
995
|
-
deprecated_args=["legacy_cache_layout"],
|
|
996
|
-
custom_message=(
|
|
997
|
-
"Legacy cache layout has been deprecated since August 2022 and will soon be removed. "
|
|
998
|
-
"See https://huggingface.co/docs/huggingface_hub/guides/manage-cache for more details."
|
|
999
|
-
),
|
|
1000
|
-
)
|
|
1001
663
|
@validate_hf_hub_args
|
|
1002
664
|
def hf_hub_download(
|
|
1003
665
|
repo_id: str,
|
|
@@ -1018,8 +680,6 @@ def hf_hub_download(
|
|
|
1018
680
|
local_files_only: bool = False,
|
|
1019
681
|
headers: Optional[Dict[str, str]] = None,
|
|
1020
682
|
endpoint: Optional[str] = None,
|
|
1021
|
-
# Deprecated args
|
|
1022
|
-
legacy_cache_layout: bool = False,
|
|
1023
683
|
resume_download: Optional[bool] = None,
|
|
1024
684
|
force_filename: Optional[str] = None,
|
|
1025
685
|
local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
|
|
@@ -1101,10 +761,6 @@ def hf_hub_download(
|
|
|
1101
761
|
local cached file if it exists.
|
|
1102
762
|
headers (`dict`, *optional*):
|
|
1103
763
|
Additional headers to be sent with the request.
|
|
1104
|
-
legacy_cache_layout (`bool`, *optional*, defaults to `False`):
|
|
1105
|
-
If `True`, uses the legacy file cache layout i.e. just call [`hf_hub_url`]
|
|
1106
|
-
then `cached_download`. This is deprecated as the new cache layout is
|
|
1107
|
-
more powerful.
|
|
1108
764
|
|
|
1109
765
|
Returns:
|
|
1110
766
|
`str`: Local path of file or if networking is off, last version of file cached on disk.
|
|
@@ -1137,7 +793,6 @@ def hf_hub_download(
|
|
|
1137
793
|
"which keeps the filenames as they are on the Hub, is now in place.",
|
|
1138
794
|
FutureWarning,
|
|
1139
795
|
)
|
|
1140
|
-
legacy_cache_layout = True
|
|
1141
796
|
if resume_download is not None:
|
|
1142
797
|
warnings.warn(
|
|
1143
798
|
"`resume_download` is deprecated and will be removed in version 1.0.0. "
|
|
@@ -1146,31 +801,6 @@ def hf_hub_download(
|
|
|
1146
801
|
FutureWarning,
|
|
1147
802
|
)
|
|
1148
803
|
|
|
1149
|
-
if legacy_cache_layout:
|
|
1150
|
-
url = hf_hub_url(
|
|
1151
|
-
repo_id,
|
|
1152
|
-
filename,
|
|
1153
|
-
subfolder=subfolder,
|
|
1154
|
-
repo_type=repo_type,
|
|
1155
|
-
revision=revision,
|
|
1156
|
-
endpoint=endpoint,
|
|
1157
|
-
)
|
|
1158
|
-
|
|
1159
|
-
return cached_download(
|
|
1160
|
-
url,
|
|
1161
|
-
library_name=library_name,
|
|
1162
|
-
library_version=library_version,
|
|
1163
|
-
cache_dir=cache_dir,
|
|
1164
|
-
user_agent=user_agent,
|
|
1165
|
-
force_download=force_download,
|
|
1166
|
-
force_filename=force_filename,
|
|
1167
|
-
proxies=proxies,
|
|
1168
|
-
etag_timeout=etag_timeout,
|
|
1169
|
-
token=token,
|
|
1170
|
-
local_files_only=local_files_only,
|
|
1171
|
-
legacy_cache_layout=legacy_cache_layout,
|
|
1172
|
-
)
|
|
1173
|
-
|
|
1174
804
|
if cache_dir is None:
|
|
1175
805
|
cache_dir = constants.HF_HUB_CACHE
|
|
1176
806
|
if revision is None:
|
|
@@ -1752,8 +1382,8 @@ def _get_metadata_or_catch_error(
|
|
|
1752
1382
|
commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
|
|
1753
1383
|
if commit_hash is not None:
|
|
1754
1384
|
no_exist_file_path = Path(storage_folder) / ".no_exist" / commit_hash / relative_filename
|
|
1755
|
-
no_exist_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1756
1385
|
try:
|
|
1386
|
+
no_exist_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1757
1387
|
no_exist_file_path.touch()
|
|
1758
1388
|
except OSError as e:
|
|
1759
1389
|
logger.error(
|