huggingface-hub 0.21.4__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +217 -1
- huggingface_hub/_commit_api.py +14 -15
- huggingface_hub/_inference_endpoints.py +12 -11
- huggingface_hub/_login.py +1 -0
- huggingface_hub/_multi_commits.py +1 -0
- huggingface_hub/_snapshot_download.py +9 -1
- huggingface_hub/_tensorboard_logger.py +1 -0
- huggingface_hub/_webhooks_payload.py +1 -0
- huggingface_hub/_webhooks_server.py +1 -0
- huggingface_hub/commands/_cli_utils.py +1 -0
- huggingface_hub/commands/delete_cache.py +1 -0
- huggingface_hub/commands/download.py +1 -0
- huggingface_hub/commands/env.py +1 -0
- huggingface_hub/commands/scan_cache.py +1 -0
- huggingface_hub/commands/upload.py +1 -0
- huggingface_hub/community.py +1 -0
- huggingface_hub/constants.py +3 -1
- huggingface_hub/errors.py +38 -0
- huggingface_hub/file_download.py +102 -95
- huggingface_hub/hf_api.py +47 -35
- huggingface_hub/hf_file_system.py +77 -3
- huggingface_hub/hub_mixin.py +215 -54
- huggingface_hub/inference/_client.py +554 -239
- huggingface_hub/inference/_common.py +195 -41
- huggingface_hub/inference/_generated/_async_client.py +558 -239
- huggingface_hub/inference/_generated/types/__init__.py +115 -0
- huggingface_hub/inference/_generated/types/audio_classification.py +43 -0
- huggingface_hub/inference/_generated/types/audio_to_audio.py +31 -0
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +116 -0
- huggingface_hub/inference/_generated/types/base.py +149 -0
- huggingface_hub/inference/_generated/types/chat_completion.py +106 -0
- huggingface_hub/inference/_generated/types/depth_estimation.py +29 -0
- huggingface_hub/inference/_generated/types/document_question_answering.py +85 -0
- huggingface_hub/inference/_generated/types/feature_extraction.py +19 -0
- huggingface_hub/inference/_generated/types/fill_mask.py +50 -0
- huggingface_hub/inference/_generated/types/image_classification.py +43 -0
- huggingface_hub/inference/_generated/types/image_segmentation.py +52 -0
- huggingface_hub/inference/_generated/types/image_to_image.py +55 -0
- huggingface_hub/inference/_generated/types/image_to_text.py +105 -0
- huggingface_hub/inference/_generated/types/object_detection.py +55 -0
- huggingface_hub/inference/_generated/types/question_answering.py +77 -0
- huggingface_hub/inference/_generated/types/sentence_similarity.py +28 -0
- huggingface_hub/inference/_generated/types/summarization.py +46 -0
- huggingface_hub/inference/_generated/types/table_question_answering.py +45 -0
- huggingface_hub/inference/_generated/types/text2text_generation.py +45 -0
- huggingface_hub/inference/_generated/types/text_classification.py +43 -0
- huggingface_hub/inference/_generated/types/text_generation.py +161 -0
- huggingface_hub/inference/_generated/types/text_to_audio.py +105 -0
- huggingface_hub/inference/_generated/types/text_to_image.py +57 -0
- huggingface_hub/inference/_generated/types/token_classification.py +53 -0
- huggingface_hub/inference/_generated/types/translation.py +46 -0
- huggingface_hub/inference/_generated/types/video_classification.py +47 -0
- huggingface_hub/inference/_generated/types/visual_question_answering.py +53 -0
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +56 -0
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +51 -0
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +55 -0
- huggingface_hub/inference/_templating.py +105 -0
- huggingface_hub/inference/_types.py +4 -152
- huggingface_hub/keras_mixin.py +39 -17
- huggingface_hub/lfs.py +20 -8
- huggingface_hub/repocard.py +11 -3
- huggingface_hub/repocard_data.py +12 -2
- huggingface_hub/serialization/__init__.py +1 -0
- huggingface_hub/serialization/_base.py +1 -0
- huggingface_hub/serialization/_numpy.py +1 -0
- huggingface_hub/serialization/_tensorflow.py +1 -0
- huggingface_hub/serialization/_torch.py +1 -0
- huggingface_hub/utils/__init__.py +4 -1
- huggingface_hub/utils/_cache_manager.py +7 -0
- huggingface_hub/utils/_chunk_utils.py +1 -0
- huggingface_hub/utils/_datetime.py +1 -0
- huggingface_hub/utils/_errors.py +10 -1
- huggingface_hub/utils/_experimental.py +1 -0
- huggingface_hub/utils/_fixes.py +19 -3
- huggingface_hub/utils/_git_credential.py +1 -0
- huggingface_hub/utils/_headers.py +10 -3
- huggingface_hub/utils/_hf_folder.py +1 -0
- huggingface_hub/utils/_http.py +1 -0
- huggingface_hub/utils/_pagination.py +1 -0
- huggingface_hub/utils/_paths.py +1 -0
- huggingface_hub/utils/_runtime.py +22 -0
- huggingface_hub/utils/_subprocess.py +1 -0
- huggingface_hub/utils/_token.py +1 -0
- huggingface_hub/utils/_typing.py +29 -1
- huggingface_hub/utils/_validators.py +1 -0
- huggingface_hub/utils/endpoint_helpers.py +1 -0
- huggingface_hub/utils/logging.py +1 -1
- huggingface_hub/utils/sha.py +1 -0
- huggingface_hub/utils/tqdm.py +1 -0
- {huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0.dist-info}/METADATA +14 -15
- huggingface_hub-0.22.0.dist-info/RECORD +113 -0
- {huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0.dist-info}/WHEEL +1 -1
- huggingface_hub/inference/_text_generation.py +0 -551
- huggingface_hub-0.21.4.dist-info/RECORD +0 -81
- {huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0.dist-info}/top_level.txt +0 -0
huggingface_hub/file_download.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import copy
|
|
2
|
+
import errno
|
|
2
3
|
import fnmatch
|
|
3
4
|
import inspect
|
|
4
5
|
import io
|
|
@@ -19,7 +20,6 @@ from typing import Any, BinaryIO, Dict, Generator, Literal, Optional, Tuple, Uni
|
|
|
19
20
|
from urllib.parse import quote, urlparse
|
|
20
21
|
|
|
21
22
|
import requests
|
|
22
|
-
from filelock import FileLock
|
|
23
23
|
|
|
24
24
|
from huggingface_hub import constants
|
|
25
25
|
|
|
@@ -54,6 +54,7 @@ from .utils import (
|
|
|
54
54
|
RepositoryNotFoundError,
|
|
55
55
|
RevisionNotFoundError,
|
|
56
56
|
SoftTemporaryDirectory,
|
|
57
|
+
WeakFileLock,
|
|
57
58
|
build_hf_headers,
|
|
58
59
|
get_fastai_version, # noqa: F401 # for backward compatibility
|
|
59
60
|
get_fastcore_version, # noqa: F401 # for backward compatibility
|
|
@@ -76,8 +77,6 @@ from .utils import (
|
|
|
76
77
|
tqdm,
|
|
77
78
|
validate_hf_hub_args,
|
|
78
79
|
)
|
|
79
|
-
from .utils._deprecation import _deprecate_method
|
|
80
|
-
from .utils._headers import _http_user_agent
|
|
81
80
|
from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility
|
|
82
81
|
from .utils._typing import HTTP_METHOD_T
|
|
83
82
|
from .utils.insecure_hashlib import sha256
|
|
@@ -347,21 +346,6 @@ def filename_to_url(
|
|
|
347
346
|
return url, etag
|
|
348
347
|
|
|
349
348
|
|
|
350
|
-
@_deprecate_method(version="0.22.0", message="Use `huggingface_hub.utils.build_hf_headers` instead.")
|
|
351
|
-
def http_user_agent(
|
|
352
|
-
*,
|
|
353
|
-
library_name: Optional[str] = None,
|
|
354
|
-
library_version: Optional[str] = None,
|
|
355
|
-
user_agent: Union[Dict, str, None] = None,
|
|
356
|
-
) -> str:
|
|
357
|
-
"""Deprecated in favor of [`build_hf_headers`]."""
|
|
358
|
-
return _http_user_agent(
|
|
359
|
-
library_name=library_name,
|
|
360
|
-
library_version=library_version,
|
|
361
|
-
user_agent=user_agent,
|
|
362
|
-
)
|
|
363
|
-
|
|
364
|
-
|
|
365
349
|
def _request_wrapper(
|
|
366
350
|
method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
|
|
367
351
|
) -> requests.Response:
|
|
@@ -420,6 +404,7 @@ def http_get(
|
|
|
420
404
|
expected_size: Optional[int] = None,
|
|
421
405
|
displayed_filename: Optional[str] = None,
|
|
422
406
|
_nb_retries: int = 5,
|
|
407
|
+
_tqdm_bar: Optional[tqdm] = None,
|
|
423
408
|
) -> None:
|
|
424
409
|
"""
|
|
425
410
|
Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub.
|
|
@@ -499,84 +484,90 @@ def http_get(
|
|
|
499
484
|
)
|
|
500
485
|
|
|
501
486
|
# Stream file to buffer
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
hf_transfer
|
|
522
|
-
|
|
523
|
-
filename=temp_file.name,
|
|
524
|
-
max_files=HF_TRANSFER_CONCURRENCY,
|
|
525
|
-
chunk_size=DOWNLOAD_CHUNK_SIZE,
|
|
526
|
-
headers=headers,
|
|
527
|
-
parallel_failures=3,
|
|
528
|
-
max_retries=5,
|
|
529
|
-
**({"callback": progress.update} if supports_callback else {}),
|
|
530
|
-
)
|
|
531
|
-
except Exception as e:
|
|
532
|
-
raise RuntimeError(
|
|
533
|
-
"An error occurred while downloading using `hf_transfer`. Consider"
|
|
534
|
-
" disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
|
|
535
|
-
) from e
|
|
536
|
-
if not supports_callback:
|
|
537
|
-
progress.update(total)
|
|
538
|
-
if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
|
|
539
|
-
raise EnvironmentError(
|
|
540
|
-
consistency_error_message.format(
|
|
541
|
-
actual_size=os.path.getsize(temp_file.name),
|
|
542
|
-
)
|
|
543
|
-
)
|
|
544
|
-
return
|
|
545
|
-
new_resume_size = resume_size
|
|
487
|
+
progress = _tqdm_bar
|
|
488
|
+
if progress is None:
|
|
489
|
+
progress = tqdm(
|
|
490
|
+
unit="B",
|
|
491
|
+
unit_scale=True,
|
|
492
|
+
total=total,
|
|
493
|
+
initial=resume_size,
|
|
494
|
+
desc=displayed_filename,
|
|
495
|
+
disable=True if (logger.getEffectiveLevel() == logging.NOTSET) else None,
|
|
496
|
+
# ^ set `disable=None` rather than `disable=False` by default to disable progress bar when no TTY attached
|
|
497
|
+
# see https://github.com/huggingface/huggingface_hub/pull/2000
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
|
|
501
|
+
supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
|
|
502
|
+
if not supports_callback:
|
|
503
|
+
warnings.warn(
|
|
504
|
+
"You are using an outdated version of `hf_transfer`. "
|
|
505
|
+
"Consider upgrading to latest version to enable progress bars "
|
|
506
|
+
"using `pip install -U hf_transfer`."
|
|
507
|
+
)
|
|
546
508
|
try:
|
|
547
|
-
|
|
548
|
-
if chunk: # filter out keep-alive new chunks
|
|
549
|
-
progress.update(len(chunk))
|
|
550
|
-
temp_file.write(chunk)
|
|
551
|
-
new_resume_size += len(chunk)
|
|
552
|
-
# Some data has been downloaded from the server so we reset the number of retries.
|
|
553
|
-
_nb_retries = 5
|
|
554
|
-
except (requests.ConnectionError, requests.ReadTimeout) as e:
|
|
555
|
-
# If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
|
|
556
|
-
# a transient error (network outage?). We log a warning message and try to resume the download a few times
|
|
557
|
-
# before giving up. Tre retry mechanism is basic but should be enough in most cases.
|
|
558
|
-
if _nb_retries <= 0:
|
|
559
|
-
logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
|
|
560
|
-
raise
|
|
561
|
-
logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
|
|
562
|
-
time.sleep(1)
|
|
563
|
-
reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
|
|
564
|
-
return http_get(
|
|
509
|
+
hf_transfer.download(
|
|
565
510
|
url=url,
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
headers=
|
|
570
|
-
|
|
571
|
-
|
|
511
|
+
filename=temp_file.name,
|
|
512
|
+
max_files=HF_TRANSFER_CONCURRENCY,
|
|
513
|
+
chunk_size=DOWNLOAD_CHUNK_SIZE,
|
|
514
|
+
headers=headers,
|
|
515
|
+
parallel_failures=3,
|
|
516
|
+
max_retries=5,
|
|
517
|
+
**({"callback": progress.update} if supports_callback else {}),
|
|
572
518
|
)
|
|
573
|
-
|
|
574
|
-
|
|
519
|
+
except Exception as e:
|
|
520
|
+
raise RuntimeError(
|
|
521
|
+
"An error occurred while downloading using `hf_transfer`. Consider"
|
|
522
|
+
" disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
|
|
523
|
+
) from e
|
|
524
|
+
if not supports_callback:
|
|
525
|
+
progress.update(total)
|
|
526
|
+
if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
|
|
575
527
|
raise EnvironmentError(
|
|
576
528
|
consistency_error_message.format(
|
|
577
|
-
actual_size=temp_file.
|
|
529
|
+
actual_size=os.path.getsize(temp_file.name),
|
|
578
530
|
)
|
|
579
531
|
)
|
|
532
|
+
return
|
|
533
|
+
new_resume_size = resume_size
|
|
534
|
+
try:
|
|
535
|
+
for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
|
|
536
|
+
if chunk: # filter out keep-alive new chunks
|
|
537
|
+
progress.update(len(chunk))
|
|
538
|
+
temp_file.write(chunk)
|
|
539
|
+
new_resume_size += len(chunk)
|
|
540
|
+
# Some data has been downloaded from the server so we reset the number of retries.
|
|
541
|
+
_nb_retries = 5
|
|
542
|
+
except (requests.ConnectionError, requests.ReadTimeout) as e:
|
|
543
|
+
# If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
|
|
544
|
+
# a transient error (network outage?). We log a warning message and try to resume the download a few times
|
|
545
|
+
# before giving up. Tre retry mechanism is basic but should be enough in most cases.
|
|
546
|
+
if _nb_retries <= 0:
|
|
547
|
+
logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
|
|
548
|
+
raise
|
|
549
|
+
logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
|
|
550
|
+
time.sleep(1)
|
|
551
|
+
reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
|
|
552
|
+
return http_get(
|
|
553
|
+
url=url,
|
|
554
|
+
temp_file=temp_file,
|
|
555
|
+
proxies=proxies,
|
|
556
|
+
resume_size=new_resume_size,
|
|
557
|
+
headers=initial_headers,
|
|
558
|
+
expected_size=expected_size,
|
|
559
|
+
_nb_retries=_nb_retries - 1,
|
|
560
|
+
_tqdm_bar=_tqdm_bar,
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
progress.close()
|
|
564
|
+
|
|
565
|
+
if expected_size is not None and expected_size != temp_file.tell():
|
|
566
|
+
raise EnvironmentError(
|
|
567
|
+
consistency_error_message.format(
|
|
568
|
+
actual_size=temp_file.tell(),
|
|
569
|
+
)
|
|
570
|
+
)
|
|
580
571
|
|
|
581
572
|
|
|
582
573
|
@validate_hf_hub_args
|
|
@@ -794,7 +785,7 @@ def cached_download(
|
|
|
794
785
|
if os.name == "nt" and len(os.path.abspath(cache_path)) > 255:
|
|
795
786
|
cache_path = "\\\\?\\" + os.path.abspath(cache_path)
|
|
796
787
|
|
|
797
|
-
with
|
|
788
|
+
with WeakFileLock(lock_path):
|
|
798
789
|
# If the download just completed while the lock was activated.
|
|
799
790
|
if os.path.exists(cache_path) and not force_download:
|
|
800
791
|
# Even if returning early like here, the lock will be released.
|
|
@@ -878,7 +869,7 @@ def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
|
|
|
878
869
|
|
|
879
870
|
By default, it will try to create a symlink using a relative path. Relative paths have 2 advantages:
|
|
880
871
|
- If the cache_folder is moved (example: back-up on a shared drive), relative paths within the cache folder will
|
|
881
|
-
not
|
|
872
|
+
not break.
|
|
882
873
|
- Relative paths seems to be better handled on Windows. Issue was reported 3 times in less than a week when
|
|
883
874
|
changing from relative to absolute paths. See https://github.com/huggingface/huggingface_hub/issues/1398,
|
|
884
875
|
https://github.com/huggingface/diffusers/issues/2729 and https://github.com/huggingface/transformers/pull/22228.
|
|
@@ -899,7 +890,7 @@ def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
|
|
|
899
890
|
cache, the file is duplicated on the disk.
|
|
900
891
|
|
|
901
892
|
In case symlinks are not supported, a warning message is displayed to the user once when loading `huggingface_hub`.
|
|
902
|
-
The warning message can be
|
|
893
|
+
The warning message can be disabled with the `DISABLE_SYMLINKS_WARNING` environment variable.
|
|
903
894
|
"""
|
|
904
895
|
try:
|
|
905
896
|
os.remove(dst)
|
|
@@ -930,6 +921,12 @@ def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
|
|
|
930
921
|
# Permission error means src and dst are not in the same volume (e.g. destination path has been provided
|
|
931
922
|
# by the user via `local_dir`. Let's test symlink support there)
|
|
932
923
|
_support_symlinks = are_symlinks_supported(abs_dst_folder)
|
|
924
|
+
except OSError as e:
|
|
925
|
+
# OS error (errno=30) means that the commonpath is readonly on Linux/MacOS.
|
|
926
|
+
if e.errno == errno.EROFS:
|
|
927
|
+
_support_symlinks = are_symlinks_supported(abs_dst_folder)
|
|
928
|
+
else:
|
|
929
|
+
raise
|
|
933
930
|
|
|
934
931
|
# Symlinks are supported => let's create a symlink.
|
|
935
932
|
if _support_symlinks:
|
|
@@ -1034,6 +1031,7 @@ def hf_hub_download(
|
|
|
1034
1031
|
resume_download: bool = False,
|
|
1035
1032
|
token: Union[bool, str, None] = None,
|
|
1036
1033
|
local_files_only: bool = False,
|
|
1034
|
+
headers: Optional[Dict[str, str]] = None,
|
|
1037
1035
|
legacy_cache_layout: bool = False,
|
|
1038
1036
|
endpoint: Optional[str] = None,
|
|
1039
1037
|
) -> str:
|
|
@@ -1130,6 +1128,8 @@ def hf_hub_download(
|
|
|
1130
1128
|
local_files_only (`bool`, *optional*, defaults to `False`):
|
|
1131
1129
|
If `True`, avoid downloading the file and return the path to the
|
|
1132
1130
|
local cached file if it exists.
|
|
1131
|
+
headers (`dict`, *optional*):
|
|
1132
|
+
Additional headers to be sent with the request.
|
|
1133
1133
|
legacy_cache_layout (`bool`, *optional*, defaults to `False`):
|
|
1134
1134
|
If `True`, uses the legacy file cache layout i.e. just call [`hf_hub_url`]
|
|
1135
1135
|
then `cached_download`. This is deprecated as the new cache layout is
|
|
@@ -1221,7 +1221,6 @@ def hf_hub_download(
|
|
|
1221
1221
|
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
|
|
1222
1222
|
|
|
1223
1223
|
storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
|
|
1224
|
-
os.makedirs(storage_folder, exist_ok=True)
|
|
1225
1224
|
|
|
1226
1225
|
# cross platform transcription of filename, to be used as a local file path.
|
|
1227
1226
|
relative_filename = os.path.join(*filename.split("/"))
|
|
@@ -1248,6 +1247,7 @@ def hf_hub_download(
|
|
|
1248
1247
|
library_name=library_name,
|
|
1249
1248
|
library_version=library_version,
|
|
1250
1249
|
user_agent=user_agent,
|
|
1250
|
+
headers=headers,
|
|
1251
1251
|
)
|
|
1252
1252
|
|
|
1253
1253
|
url_to_download = url
|
|
@@ -1448,7 +1448,7 @@ def hf_hub_download(
|
|
|
1448
1448
|
blob_path = "\\\\?\\" + os.path.abspath(blob_path)
|
|
1449
1449
|
|
|
1450
1450
|
Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
|
|
1451
|
-
with
|
|
1451
|
+
with WeakFileLock(lock_path):
|
|
1452
1452
|
# If the download just completed while the lock was activated.
|
|
1453
1453
|
if os.path.exists(pointer_path) and not force_download:
|
|
1454
1454
|
# Even if returning early like here, the lock will be released.
|
|
@@ -1630,6 +1630,7 @@ def get_hf_file_metadata(
|
|
|
1630
1630
|
library_name: Optional[str] = None,
|
|
1631
1631
|
library_version: Optional[str] = None,
|
|
1632
1632
|
user_agent: Union[Dict, str, None] = None,
|
|
1633
|
+
headers: Optional[Dict[str, str]] = None,
|
|
1633
1634
|
) -> HfFileMetadata:
|
|
1634
1635
|
"""Fetch metadata of a file versioned on the Hub for a given url.
|
|
1635
1636
|
|
|
@@ -1653,13 +1654,19 @@ def get_hf_file_metadata(
|
|
|
1653
1654
|
The version of the library.
|
|
1654
1655
|
user_agent (`dict`, `str`, *optional*):
|
|
1655
1656
|
The user-agent info in the form of a dictionary or a string.
|
|
1657
|
+
headers (`dict`, *optional*):
|
|
1658
|
+
Additional headers to be sent with the request.
|
|
1656
1659
|
|
|
1657
1660
|
Returns:
|
|
1658
1661
|
A [`HfFileMetadata`] object containing metadata such as location, etag, size and
|
|
1659
1662
|
commit_hash.
|
|
1660
1663
|
"""
|
|
1661
1664
|
headers = build_hf_headers(
|
|
1662
|
-
token=token,
|
|
1665
|
+
token=token,
|
|
1666
|
+
library_name=library_name,
|
|
1667
|
+
library_version=library_version,
|
|
1668
|
+
user_agent=user_agent,
|
|
1669
|
+
headers=headers,
|
|
1663
1670
|
)
|
|
1664
1671
|
headers["Accept-Encoding"] = "identity" # prevent any compression => we want to know the real size of the file
|
|
1665
1672
|
|
huggingface_hub/hf_api.py
CHANGED
|
@@ -82,6 +82,8 @@ from .community import (
|
|
|
82
82
|
deserialize_event,
|
|
83
83
|
)
|
|
84
84
|
from .constants import (
|
|
85
|
+
_HF_DEFAULT_ENDPOINT,
|
|
86
|
+
_HF_DEFAULT_STAGING_ENDPOINT,
|
|
85
87
|
DEFAULT_ETAG_TIMEOUT,
|
|
86
88
|
DEFAULT_REQUEST_TIMEOUT,
|
|
87
89
|
DEFAULT_REVISION,
|
|
@@ -182,6 +184,12 @@ def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> Tu
|
|
|
182
184
|
If `repo_type` is unknown.
|
|
183
185
|
"""
|
|
184
186
|
input_hf_id = hf_id
|
|
187
|
+
|
|
188
|
+
# check if a proxy has been set => if yes, update the returned URL to use the proxy
|
|
189
|
+
if ENDPOINT not in (_HF_DEFAULT_ENDPOINT, _HF_DEFAULT_STAGING_ENDPOINT):
|
|
190
|
+
hf_id = hf_id.replace(_HF_DEFAULT_ENDPOINT, ENDPOINT)
|
|
191
|
+
hf_id = hf_id.replace(_HF_DEFAULT_STAGING_ENDPOINT, ENDPOINT)
|
|
192
|
+
|
|
185
193
|
hub_url = re.sub(r"https?://", "", hub_url if hub_url is not None else ENDPOINT)
|
|
186
194
|
is_hf_url = hub_url in hf_id and "@" not in hf_id
|
|
187
195
|
|
|
@@ -1234,10 +1242,11 @@ class HfApi:
|
|
|
1234
1242
|
def __init__(
|
|
1235
1243
|
self,
|
|
1236
1244
|
endpoint: Optional[str] = None,
|
|
1237
|
-
token:
|
|
1245
|
+
token: Union[str, bool, None] = None,
|
|
1238
1246
|
library_name: Optional[str] = None,
|
|
1239
1247
|
library_version: Optional[str] = None,
|
|
1240
1248
|
user_agent: Union[Dict, str, None] = None,
|
|
1249
|
+
headers: Optional[Dict[str, str]] = None,
|
|
1241
1250
|
) -> None:
|
|
1242
1251
|
"""Create a HF client to interact with the Hub via HTTP.
|
|
1243
1252
|
|
|
@@ -1247,9 +1256,9 @@ class HfApi:
|
|
|
1247
1256
|
directly at the root of `huggingface_hub`.
|
|
1248
1257
|
|
|
1249
1258
|
Args:
|
|
1250
|
-
token (`str`, *optional*):
|
|
1251
|
-
Hugging Face token. Will default to the locally saved token if
|
|
1252
|
-
|
|
1259
|
+
token (`str` or `bool`, *optional*):
|
|
1260
|
+
Hugging Face token. Will default to the locally saved token if not provided.
|
|
1261
|
+
Pass `token=False` if you don't want to send your token to the server.
|
|
1253
1262
|
library_name (`str`, *optional*):
|
|
1254
1263
|
The name of the library that is making the HTTP request. Will be added to
|
|
1255
1264
|
the user-agent header. Example: `"transformers"`.
|
|
@@ -1259,12 +1268,16 @@ class HfApi:
|
|
|
1259
1268
|
user_agent (`str`, `dict`, *optional*):
|
|
1260
1269
|
The user agent info in the form of a dictionary or a single string. It will
|
|
1261
1270
|
be completed with information about the installed packages.
|
|
1271
|
+
headers (`dict`, *optional*):
|
|
1272
|
+
Additional headers to be sent with each request. Example: `{"X-My-Header": "value"}`.
|
|
1273
|
+
Headers passed here are taking precedence over the default headers.
|
|
1262
1274
|
"""
|
|
1263
1275
|
self.endpoint = endpoint if endpoint is not None else ENDPOINT
|
|
1264
1276
|
self.token = token
|
|
1265
1277
|
self.library_name = library_name
|
|
1266
1278
|
self.library_version = library_version
|
|
1267
1279
|
self.user_agent = user_agent
|
|
1280
|
+
self.headers = headers
|
|
1268
1281
|
self._thread_pool: Optional[ThreadPoolExecutor] = None
|
|
1269
1282
|
|
|
1270
1283
|
def run_as_future(self, fn: Callable[..., R], *args, **kwargs) -> Future[R]:
|
|
@@ -2514,7 +2527,7 @@ class HfApi:
|
|
|
2514
2527
|
*,
|
|
2515
2528
|
repo_type: Optional[str] = None,
|
|
2516
2529
|
revision: Optional[str] = None,
|
|
2517
|
-
token:
|
|
2530
|
+
token: Union[str, bool, None] = None,
|
|
2518
2531
|
) -> bool:
|
|
2519
2532
|
"""
|
|
2520
2533
|
Checks if a file exists in a repository on the Hugging Face Hub.
|
|
@@ -3227,7 +3240,7 @@ class HfApi:
|
|
|
3227
3240
|
|
|
3228
3241
|
# Prepare request
|
|
3229
3242
|
url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/super-squash/{branch}"
|
|
3230
|
-
headers = self._build_hf_headers(token=token
|
|
3243
|
+
headers = self._build_hf_headers(token=token)
|
|
3231
3244
|
commit_message = commit_message or f"Super-squash branch '{branch}' using huggingface_hub"
|
|
3232
3245
|
|
|
3233
3246
|
# Super-squash
|
|
@@ -3333,7 +3346,7 @@ class HfApi:
|
|
|
3333
3346
|
# Testing purposes only.
|
|
3334
3347
|
# See https://github.com/huggingface/huggingface_hub/pull/733/files#r820604472
|
|
3335
3348
|
json["lfsmultipartthresh"] = self._lfsmultipartthresh # type: ignore
|
|
3336
|
-
headers = self._build_hf_headers(token=token
|
|
3349
|
+
headers = self._build_hf_headers(token=token)
|
|
3337
3350
|
|
|
3338
3351
|
while True:
|
|
3339
3352
|
r = get_session().post(path, headers=headers, json=json)
|
|
@@ -3408,7 +3421,7 @@ class HfApi:
|
|
|
3408
3421
|
if repo_type is not None:
|
|
3409
3422
|
json["type"] = repo_type
|
|
3410
3423
|
|
|
3411
|
-
headers = self._build_hf_headers(token=token
|
|
3424
|
+
headers = self._build_hf_headers(token=token)
|
|
3412
3425
|
r = get_session().delete(path, headers=headers, json=json)
|
|
3413
3426
|
try:
|
|
3414
3427
|
hf_raise_for_status(r)
|
|
@@ -3473,7 +3486,7 @@ class HfApi:
|
|
|
3473
3486
|
|
|
3474
3487
|
r = get_session().put(
|
|
3475
3488
|
url=f"{self.endpoint}/api/{repo_type}s/{namespace}/{name}/settings",
|
|
3476
|
-
headers=self._build_hf_headers(token=token
|
|
3489
|
+
headers=self._build_hf_headers(token=token),
|
|
3477
3490
|
json={"private": private},
|
|
3478
3491
|
)
|
|
3479
3492
|
hf_raise_for_status(r)
|
|
@@ -3530,7 +3543,7 @@ class HfApi:
|
|
|
3530
3543
|
json = {"fromRepo": from_id, "toRepo": to_id, "type": repo_type}
|
|
3531
3544
|
|
|
3532
3545
|
path = f"{self.endpoint}/api/repos/move"
|
|
3533
|
-
headers = self._build_hf_headers(token=token
|
|
3546
|
+
headers = self._build_hf_headers(token=token)
|
|
3534
3547
|
r = get_session().post(path, headers=headers, json=json)
|
|
3535
3548
|
try:
|
|
3536
3549
|
hf_raise_for_status(r)
|
|
@@ -3556,8 +3569,7 @@ class HfApi:
|
|
|
3556
3569
|
num_threads: int = 5,
|
|
3557
3570
|
parent_commit: Optional[str] = None,
|
|
3558
3571
|
run_as_future: Literal[False] = ...,
|
|
3559
|
-
) -> CommitInfo:
|
|
3560
|
-
...
|
|
3572
|
+
) -> CommitInfo: ...
|
|
3561
3573
|
|
|
3562
3574
|
@overload
|
|
3563
3575
|
def create_commit(
|
|
@@ -3574,8 +3586,7 @@ class HfApi:
|
|
|
3574
3586
|
num_threads: int = 5,
|
|
3575
3587
|
parent_commit: Optional[str] = None,
|
|
3576
3588
|
run_as_future: Literal[True] = ...,
|
|
3577
|
-
) -> Future[CommitInfo]:
|
|
3578
|
-
...
|
|
3589
|
+
) -> Future[CommitInfo]: ...
|
|
3579
3590
|
|
|
3580
3591
|
@validate_hf_hub_args
|
|
3581
3592
|
@future_compatible
|
|
@@ -3711,6 +3722,8 @@ class HfApi:
|
|
|
3711
3722
|
revision = quote(unquoted_revision, safe="")
|
|
3712
3723
|
create_pr = create_pr if create_pr is not None else False
|
|
3713
3724
|
|
|
3725
|
+
headers = self._build_hf_headers(token=token)
|
|
3726
|
+
|
|
3714
3727
|
operations = list(operations)
|
|
3715
3728
|
additions = [op for op in operations if isinstance(op, CommitOperationAdd)]
|
|
3716
3729
|
copies = [op for op in operations if isinstance(op, CommitOperationCopy)]
|
|
@@ -3738,7 +3751,7 @@ class HfApi:
|
|
|
3738
3751
|
response = get_session().post(
|
|
3739
3752
|
f"{ENDPOINT}/api/validate-yaml",
|
|
3740
3753
|
json={"content": file.read().decode(), "repoType": repo_type},
|
|
3741
|
-
headers=
|
|
3754
|
+
headers=headers,
|
|
3742
3755
|
)
|
|
3743
3756
|
# Handle warnings (example: empty metadata)
|
|
3744
3757
|
response_content = response.json()
|
|
@@ -3773,7 +3786,7 @@ class HfApi:
|
|
|
3773
3786
|
copies=copies,
|
|
3774
3787
|
repo_type=repo_type,
|
|
3775
3788
|
repo_id=repo_id,
|
|
3776
|
-
|
|
3789
|
+
headers=headers,
|
|
3777
3790
|
revision=revision,
|
|
3778
3791
|
endpoint=self.endpoint,
|
|
3779
3792
|
)
|
|
@@ -3794,7 +3807,7 @@ class HfApi:
|
|
|
3794
3807
|
headers = {
|
|
3795
3808
|
# See https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
|
|
3796
3809
|
"Content-Type": "application/x-ndjson",
|
|
3797
|
-
**
|
|
3810
|
+
**headers,
|
|
3798
3811
|
}
|
|
3799
3812
|
data = b"".join(_payload_as_ndjson())
|
|
3800
3813
|
params = {"create_pr": "1"} if create_pr else None
|
|
@@ -4213,6 +4226,7 @@ class HfApi:
|
|
|
4213
4226
|
raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}")
|
|
4214
4227
|
revision = quote(revision, safe="") if revision is not None else DEFAULT_REVISION
|
|
4215
4228
|
create_pr = create_pr if create_pr is not None else False
|
|
4229
|
+
headers = self._build_hf_headers(token=token)
|
|
4216
4230
|
|
|
4217
4231
|
# Check if a `gitignore` file is being committed to the Hub.
|
|
4218
4232
|
additions = list(additions)
|
|
@@ -4232,7 +4246,7 @@ class HfApi:
|
|
|
4232
4246
|
additions=new_additions,
|
|
4233
4247
|
repo_type=repo_type,
|
|
4234
4248
|
repo_id=repo_id,
|
|
4235
|
-
|
|
4249
|
+
headers=headers,
|
|
4236
4250
|
revision=revision,
|
|
4237
4251
|
endpoint=self.endpoint,
|
|
4238
4252
|
create_pr=create_pr or False,
|
|
@@ -4263,7 +4277,7 @@ class HfApi:
|
|
|
4263
4277
|
additions=new_lfs_additions_to_upload,
|
|
4264
4278
|
repo_type=repo_type,
|
|
4265
4279
|
repo_id=repo_id,
|
|
4266
|
-
|
|
4280
|
+
headers=headers,
|
|
4267
4281
|
endpoint=self.endpoint,
|
|
4268
4282
|
num_threads=num_threads,
|
|
4269
4283
|
# If `create_pr`, we don't want to check user permission on the revision as users with read permission
|
|
@@ -4291,8 +4305,7 @@ class HfApi:
|
|
|
4291
4305
|
create_pr: Optional[bool] = None,
|
|
4292
4306
|
parent_commit: Optional[str] = None,
|
|
4293
4307
|
run_as_future: Literal[False] = ...,
|
|
4294
|
-
) -> CommitInfo:
|
|
4295
|
-
...
|
|
4308
|
+
) -> CommitInfo: ...
|
|
4296
4309
|
|
|
4297
4310
|
@overload
|
|
4298
4311
|
def upload_file(
|
|
@@ -4309,8 +4322,7 @@ class HfApi:
|
|
|
4309
4322
|
create_pr: Optional[bool] = None,
|
|
4310
4323
|
parent_commit: Optional[str] = None,
|
|
4311
4324
|
run_as_future: Literal[True] = ...,
|
|
4312
|
-
) -> Future[CommitInfo]:
|
|
4313
|
-
...
|
|
4325
|
+
) -> Future[CommitInfo]: ...
|
|
4314
4326
|
|
|
4315
4327
|
@validate_hf_hub_args
|
|
4316
4328
|
@future_compatible
|
|
@@ -4498,8 +4510,7 @@ class HfApi:
|
|
|
4498
4510
|
multi_commits: Literal[False] = ...,
|
|
4499
4511
|
multi_commits_verbose: bool = False,
|
|
4500
4512
|
run_as_future: Literal[False] = ...,
|
|
4501
|
-
) -> CommitInfo:
|
|
4502
|
-
...
|
|
4513
|
+
) -> CommitInfo: ...
|
|
4503
4514
|
|
|
4504
4515
|
@overload
|
|
4505
4516
|
def upload_folder( # type: ignore
|
|
@@ -4544,8 +4555,7 @@ class HfApi:
|
|
|
4544
4555
|
multi_commits: Literal[False] = ...,
|
|
4545
4556
|
multi_commits_verbose: bool = False,
|
|
4546
4557
|
run_as_future: Literal[True] = ...,
|
|
4547
|
-
) -> Future[CommitInfo]:
|
|
4548
|
-
...
|
|
4558
|
+
) -> Future[CommitInfo]: ...
|
|
4549
4559
|
|
|
4550
4560
|
@overload
|
|
4551
4561
|
def upload_folder(
|
|
@@ -5188,6 +5198,7 @@ class HfApi:
|
|
|
5188
5198
|
etag_timeout=etag_timeout,
|
|
5189
5199
|
resume_download=resume_download,
|
|
5190
5200
|
token=token,
|
|
5201
|
+
headers=self.headers,
|
|
5191
5202
|
local_files_only=local_files_only,
|
|
5192
5203
|
legacy_cache_layout=legacy_cache_layout,
|
|
5193
5204
|
)
|
|
@@ -5609,7 +5620,7 @@ class HfApi:
|
|
|
5609
5620
|
|
|
5610
5621
|
# Prepare request
|
|
5611
5622
|
branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}"
|
|
5612
|
-
headers = self._build_hf_headers(token=token
|
|
5623
|
+
headers = self._build_hf_headers(token=token)
|
|
5613
5624
|
payload = {}
|
|
5614
5625
|
if revision is not None:
|
|
5615
5626
|
payload["startingPoint"] = revision
|
|
@@ -5665,7 +5676,7 @@ class HfApi:
|
|
|
5665
5676
|
|
|
5666
5677
|
# Prepare request
|
|
5667
5678
|
branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}"
|
|
5668
|
-
headers = self._build_hf_headers(token=token
|
|
5679
|
+
headers = self._build_hf_headers(token=token)
|
|
5669
5680
|
|
|
5670
5681
|
# Delete branch
|
|
5671
5682
|
response = get_session().delete(url=branch_url, headers=headers)
|
|
@@ -5729,7 +5740,7 @@ class HfApi:
|
|
|
5729
5740
|
|
|
5730
5741
|
# Prepare request
|
|
5731
5742
|
tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{revision}"
|
|
5732
|
-
headers = self._build_hf_headers(token=token
|
|
5743
|
+
headers = self._build_hf_headers(token=token)
|
|
5733
5744
|
payload = {"tag": tag}
|
|
5734
5745
|
if tag_message is not None:
|
|
5735
5746
|
payload["message"] = tag_message
|
|
@@ -5782,7 +5793,7 @@ class HfApi:
|
|
|
5782
5793
|
|
|
5783
5794
|
# Prepare request
|
|
5784
5795
|
tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{tag}"
|
|
5785
|
-
headers = self._build_hf_headers(token=token
|
|
5796
|
+
headers = self._build_hf_headers(token=token)
|
|
5786
5797
|
|
|
5787
5798
|
# Un-tag
|
|
5788
5799
|
response = get_session().delete(url=tag_url, headers=headers)
|
|
@@ -6077,7 +6088,7 @@ class HfApi:
|
|
|
6077
6088
|
)
|
|
6078
6089
|
)
|
|
6079
6090
|
|
|
6080
|
-
headers = self._build_hf_headers(token=token
|
|
6091
|
+
headers = self._build_hf_headers(token=token)
|
|
6081
6092
|
resp = get_session().post(
|
|
6082
6093
|
f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions",
|
|
6083
6094
|
json={
|
|
@@ -6175,7 +6186,7 @@ class HfApi:
|
|
|
6175
6186
|
|
|
6176
6187
|
path = f"{self.endpoint}/api/{repo_id}/discussions/{discussion_num}/{resource}"
|
|
6177
6188
|
|
|
6178
|
-
headers = self._build_hf_headers(token=token
|
|
6189
|
+
headers = self._build_hf_headers(token=token)
|
|
6179
6190
|
resp = requests.post(path, headers=headers, json=body)
|
|
6180
6191
|
hf_raise_for_status(resp)
|
|
6181
6192
|
return resp
|
|
@@ -6993,7 +7004,7 @@ class HfApi:
|
|
|
6993
7004
|
|
|
6994
7005
|
r = get_session().post(
|
|
6995
7006
|
f"{self.endpoint}/api/spaces/{from_id}/duplicate",
|
|
6996
|
-
headers=self._build_hf_headers(token=token
|
|
7007
|
+
headers=self._build_hf_headers(token=token),
|
|
6997
7008
|
json=payload,
|
|
6998
7009
|
)
|
|
6999
7010
|
|
|
@@ -8408,6 +8419,7 @@ class HfApi:
|
|
|
8408
8419
|
library_name=library_name or self.library_name,
|
|
8409
8420
|
library_version=library_version or self.library_version,
|
|
8410
8421
|
user_agent=user_agent or self.user_agent,
|
|
8422
|
+
headers=self.headers,
|
|
8411
8423
|
)
|
|
8412
8424
|
|
|
8413
8425
|
def _prepare_upload_folder_deletions(
|
|
@@ -8435,7 +8447,7 @@ class HfApi:
|
|
|
8435
8447
|
filenames = self.list_repo_files(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token)
|
|
8436
8448
|
|
|
8437
8449
|
# Compute relative path in repo
|
|
8438
|
-
if path_in_repo:
|
|
8450
|
+
if path_in_repo and path_in_repo not in (".", "./"):
|
|
8439
8451
|
path_in_repo = path_in_repo.strip("/") + "/" # harmonize
|
|
8440
8452
|
relpath_to_abspath = {
|
|
8441
8453
|
file[len(path_in_repo) :]: file for file in filenames if file.startswith(path_in_repo)
|