huggingface-hub 0.22.0rc0__py3-none-any.whl → 0.22.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

@@ -46,7 +46,7 @@ import sys
46
46
  from typing import TYPE_CHECKING
47
47
 
48
48
 
49
- __version__ = "0.22.0.rc0"
49
+ __version__ = "0.22.0.rc1"
50
50
 
51
51
  # Alphabetical order of definitions is ensured in tests
52
52
  # WARNING: any comment added in this dictionary definition will be lost when
@@ -404,6 +404,7 @@ def http_get(
404
404
  expected_size: Optional[int] = None,
405
405
  displayed_filename: Optional[str] = None,
406
406
  _nb_retries: int = 5,
407
+ _tqdm_bar: Optional[tqdm] = None,
407
408
  ) -> None:
408
409
  """
409
410
  Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub.
@@ -483,84 +484,90 @@ def http_get(
483
484
  )
484
485
 
485
486
  # Stream file to buffer
486
- with tqdm(
487
- unit="B",
488
- unit_scale=True,
489
- total=total,
490
- initial=resume_size,
491
- desc=displayed_filename,
492
- disable=True if (logger.getEffectiveLevel() == logging.NOTSET) else None,
493
- # ^ set `disable=None` rather than `disable=False` by default to disable progress bar when no TTY attached
494
- # see https://github.com/huggingface/huggingface_hub/pull/2000
495
- ) as progress:
496
- if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
497
- supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
498
- if not supports_callback:
499
- warnings.warn(
500
- "You are using an outdated version of `hf_transfer`. "
501
- "Consider upgrading to latest version to enable progress bars "
502
- "using `pip install -U hf_transfer`."
503
- )
504
- try:
505
- hf_transfer.download(
506
- url=url,
507
- filename=temp_file.name,
508
- max_files=HF_TRANSFER_CONCURRENCY,
509
- chunk_size=DOWNLOAD_CHUNK_SIZE,
510
- headers=headers,
511
- parallel_failures=3,
512
- max_retries=5,
513
- **({"callback": progress.update} if supports_callback else {}),
514
- )
515
- except Exception as e:
516
- raise RuntimeError(
517
- "An error occurred while downloading using `hf_transfer`. Consider"
518
- " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
519
- ) from e
520
- if not supports_callback:
521
- progress.update(total)
522
- if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
523
- raise EnvironmentError(
524
- consistency_error_message.format(
525
- actual_size=os.path.getsize(temp_file.name),
526
- )
527
- )
528
- return
529
- new_resume_size = resume_size
487
+ progress = _tqdm_bar
488
+ if progress is None:
489
+ progress = tqdm(
490
+ unit="B",
491
+ unit_scale=True,
492
+ total=total,
493
+ initial=resume_size,
494
+ desc=displayed_filename,
495
+ disable=True if (logger.getEffectiveLevel() == logging.NOTSET) else None,
496
+ # ^ set `disable=None` rather than `disable=False` by default to disable progress bar when no TTY attached
497
+ # see https://github.com/huggingface/huggingface_hub/pull/2000
498
+ )
499
+
500
+ if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
501
+ supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
502
+ if not supports_callback:
503
+ warnings.warn(
504
+ "You are using an outdated version of `hf_transfer`. "
505
+ "Consider upgrading to latest version to enable progress bars "
506
+ "using `pip install -U hf_transfer`."
507
+ )
530
508
  try:
531
- for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
532
- if chunk: # filter out keep-alive new chunks
533
- progress.update(len(chunk))
534
- temp_file.write(chunk)
535
- new_resume_size += len(chunk)
536
- # Some data has been downloaded from the server so we reset the number of retries.
537
- _nb_retries = 5
538
- except (requests.ConnectionError, requests.ReadTimeout) as e:
539
- # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
540
- # a transient error (network outage?). We log a warning message and try to resume the download a few times
541
- # before giving up. Tre retry mechanism is basic but should be enough in most cases.
542
- if _nb_retries <= 0:
543
- logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
544
- raise
545
- logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
546
- time.sleep(1)
547
- reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
548
- return http_get(
509
+ hf_transfer.download(
549
510
  url=url,
550
- temp_file=temp_file,
551
- proxies=proxies,
552
- resume_size=new_resume_size,
553
- headers=initial_headers,
554
- expected_size=expected_size,
555
- _nb_retries=_nb_retries - 1,
511
+ filename=temp_file.name,
512
+ max_files=HF_TRANSFER_CONCURRENCY,
513
+ chunk_size=DOWNLOAD_CHUNK_SIZE,
514
+ headers=headers,
515
+ parallel_failures=3,
516
+ max_retries=5,
517
+ **({"callback": progress.update} if supports_callback else {}),
556
518
  )
557
-
558
- if expected_size is not None and expected_size != temp_file.tell():
519
+ except Exception as e:
520
+ raise RuntimeError(
521
+ "An error occurred while downloading using `hf_transfer`. Consider"
522
+ " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
523
+ ) from e
524
+ if not supports_callback:
525
+ progress.update(total)
526
+ if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
559
527
  raise EnvironmentError(
560
528
  consistency_error_message.format(
561
- actual_size=temp_file.tell(),
529
+ actual_size=os.path.getsize(temp_file.name),
562
530
  )
563
531
  )
532
+ return
533
+ new_resume_size = resume_size
534
+ try:
535
+ for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
536
+ if chunk: # filter out keep-alive new chunks
537
+ progress.update(len(chunk))
538
+ temp_file.write(chunk)
539
+ new_resume_size += len(chunk)
540
+ # Some data has been downloaded from the server so we reset the number of retries.
541
+ _nb_retries = 5
542
+ except (requests.ConnectionError, requests.ReadTimeout) as e:
543
+ # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
544
+ # a transient error (network outage?). We log a warning message and try to resume the download a few times
545
+ # before giving up. Tre retry mechanism is basic but should be enough in most cases.
546
+ if _nb_retries <= 0:
547
+ logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
548
+ raise
549
+ logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
550
+ time.sleep(1)
551
+ reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
552
+ return http_get(
553
+ url=url,
554
+ temp_file=temp_file,
555
+ proxies=proxies,
556
+ resume_size=new_resume_size,
557
+ headers=initial_headers,
558
+ expected_size=expected_size,
559
+ _nb_retries=_nb_retries - 1,
560
+ _tqdm_bar=_tqdm_bar,
561
+ )
562
+
563
+ progress.close()
564
+
565
+ if expected_size is not None and expected_size != temp_file.tell():
566
+ raise EnvironmentError(
567
+ consistency_error_message.format(
568
+ actual_size=temp_file.tell(),
569
+ )
570
+ )
564
571
 
565
572
 
566
573
  @validate_hf_hub_args
@@ -6,15 +6,24 @@ from collections import deque
6
6
  from dataclasses import dataclass, field
7
7
  from datetime import datetime
8
8
  from itertools import chain
9
+ from pathlib import Path
9
10
  from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union
10
11
  from urllib.parse import quote, unquote
11
12
 
12
13
  import fsspec
14
+ from fsspec.callbacks import _DEFAULT_CALLBACK, NoOpCallback, TqdmCallback
15
+ from fsspec.utils import isfilelike
13
16
  from requests import Response
14
17
 
15
18
  from ._commit_api import CommitOperationCopy, CommitOperationDelete
16
- from .constants import DEFAULT_REVISION, ENDPOINT, REPO_TYPE_MODEL, REPO_TYPES_MAPPING, REPO_TYPES_URL_PREFIXES
17
- from .file_download import hf_hub_url
19
+ from .constants import (
20
+ DEFAULT_REVISION,
21
+ ENDPOINT,
22
+ REPO_TYPE_MODEL,
23
+ REPO_TYPES_MAPPING,
24
+ REPO_TYPES_URL_PREFIXES,
25
+ )
26
+ from .file_download import hf_hub_url, http_get
18
27
  from .hf_api import HfApi, LastCommitInfo, RepoFile
19
28
  from .utils import (
20
29
  EntryNotFoundError,
@@ -591,6 +600,58 @@ class HfFileSystem(fsspec.AbstractFileSystem):
591
600
  url = url.replace("/resolve/", "/tree/", 1)
592
601
  return url
593
602
 
603
+ def get_file(self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs) -> None:
604
+ """Copy single remote file to local."""
605
+ revision = kwargs.get("revision")
606
+ unhandled_kwargs = set(kwargs.keys()) - {"revision"}
607
+ if not isinstance(callback, (NoOpCallback, TqdmCallback)) or len(unhandled_kwargs) > 0:
608
+ # for now, let's not handle custom callbacks
609
+ # and let's not handle custom kwargs
610
+ return super().get_file(rpath, lpath, callback=callback, outfile=outfile, **kwargs)
611
+
612
+ # Taken from https://github.com/fsspec/filesystem_spec/blob/47b445ae4c284a82dd15e0287b1ffc410e8fc470/fsspec/spec.py#L883
613
+ if isfilelike(lpath):
614
+ outfile = lpath
615
+ elif self.isdir(rpath):
616
+ os.makedirs(lpath, exist_ok=True)
617
+ return None
618
+
619
+ if isinstance(lpath, (str, Path)): # otherwise, let's assume it's a file-like object
620
+ os.makedirs(os.path.dirname(lpath), exist_ok=True)
621
+
622
+ # Open file if not already open
623
+ close_file = False
624
+ if outfile is None:
625
+ outfile = open(lpath, "wb")
626
+ close_file = True
627
+ initial_pos = outfile.tell()
628
+
629
+ # Custom implementation of `get_file` to use `http_get`.
630
+ resolve_remote_path = self.resolve_path(rpath, revision=revision)
631
+ expected_size = self.info(rpath, revision=revision)["size"]
632
+ callback.set_size(expected_size)
633
+ try:
634
+ http_get(
635
+ url=hf_hub_url(
636
+ repo_id=resolve_remote_path.repo_id,
637
+ revision=resolve_remote_path.revision,
638
+ filename=resolve_remote_path.path_in_repo,
639
+ repo_type=resolve_remote_path.repo_type,
640
+ endpoint=self.endpoint,
641
+ ),
642
+ temp_file=outfile,
643
+ displayed_filename=rpath,
644
+ expected_size=expected_size,
645
+ resume_size=0,
646
+ headers=self._api._build_hf_headers(),
647
+ _tqdm_bar=callback.tqdm if isinstance(callback, TqdmCallback) else None,
648
+ )
649
+ outfile.seek(initial_pos)
650
+ finally:
651
+ # Close file only if we opened it ourselves
652
+ if close_file:
653
+ outfile.close()
654
+
594
655
  @property
595
656
  def transaction(self):
596
657
  """A context within which files are committed together upon exit
@@ -618,6 +679,7 @@ class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
618
679
  raise FileNotFoundError(
619
680
  f"{e}.\nMake sure the repository and revision exist before writing data."
620
681
  ) from e
682
+ raise
621
683
  super().__init__(fs, self.resolved_path.unresolve(), **kwargs)
622
684
  self.fs: HfFileSystem
623
685
 
@@ -667,6 +729,18 @@ class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
667
729
  path=self.resolved_path.unresolve(),
668
730
  )
669
731
 
732
+ def read(self, length=-1):
733
+ """Read remote file.
734
+
735
+ If `length` is not provided or is -1, the entire file is downloaded and read. On POSIX systems and if
736
+ `hf_transfer` is not enabled, the file is loaded in memory directly. Otherwise, the file is downloaded to a
737
+ temporary file and read from there.
738
+ """
739
+ if self.mode == "rb" and (length is None or length == -1) and self.loc == 0:
740
+ with self.fs.open(self.path, "rb", block_size=0) as f: # block_size=0 enables fast streaming
741
+ return f.read()
742
+ return super().read(length)
743
+
670
744
  def url(self) -> str:
671
745
  return self.fs.url(self.path)
672
746
 
@@ -695,7 +769,7 @@ class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
695
769
  raise FileNotFoundError(
696
770
  f"{e}.\nMake sure the repository and revision exist before writing data."
697
771
  ) from e
698
- # avoid an unecessary .info() call to instantiate .details
772
+ # avoid an unnecessary .info() call to instantiate .details
699
773
  self.details = {"name": self.resolved_path.unresolve(), "size": None}
700
774
  super().__init__(
701
775
  fs, self.resolved_path.unresolve(), mode=mode, block_size=block_size, cache_type=cache_type, **kwargs
@@ -262,6 +262,12 @@ class ModelHubMixin:
262
262
  save_directory = Path(save_directory)
263
263
  save_directory.mkdir(parents=True, exist_ok=True)
264
264
 
265
+ # Remove config.json if already exists. After `_save_pretrained` we don't want to overwrite config.json
266
+ # as it might have been saved by the custom `_save_pretrained` already. However we do want to overwrite
267
+ # an existing config.json if it was not saved by `_save_pretrained`.
268
+ config_path = save_directory / CONFIG_NAME
269
+ config_path.unlink(missing_ok=True)
270
+
265
271
  # save model weights/files (framework-specific)
266
272
  self._save_pretrained(save_directory)
267
273
 
@@ -271,7 +277,6 @@ class ModelHubMixin:
271
277
  if config is not None:
272
278
  if is_dataclass(config):
273
279
  config = asdict(config) # type: ignore[arg-type]
274
- config_path = save_directory / CONFIG_NAME
275
280
  if not config_path.exists():
276
281
  config_str = json.dumps(config, sort_keys=True, indent=2)
277
282
  config_path.write_text(config_str)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: huggingface-hub
3
- Version: 0.22.0rc0
3
+ Version: 0.22.0rc1
4
4
  Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
5
5
  Home-page: https://github.com/huggingface/huggingface_hub
6
6
  Author: Hugging Face, Inc.
@@ -1,4 +1,4 @@
1
- huggingface_hub/__init__.py,sha256=ArwbPJXewqgiHAhTd-S950Pxr2gT4Xh2y0d51PHzr8g,31079
1
+ huggingface_hub/__init__.py,sha256=LID6MP7AuAoNupxUtCQblXubMjWrXLUx2bME13g1omg,31079
2
2
  huggingface_hub/_commit_api.py,sha256=ANamHMhHjvA6xCaLuyWcbwK62JMvEnGwIkpmHBlVoZo,29175
3
3
  huggingface_hub/_commit_scheduler.py,sha256=FgfjYv3E0oK3iBxDdy45Y7t78FWkmjnBR4dRd5aZviU,13653
4
4
  huggingface_hub/_inference_endpoints.py,sha256=wGcnxZNFCbMK77SA90fPsZ9bqNGwPopSVr-sTbdw3o8,15763
@@ -13,10 +13,10 @@ huggingface_hub/community.py,sha256=SBaOfI-3atCzRbO0gDS8BYxctbdvD4G0X6D0GfY8Fgc,
13
13
  huggingface_hub/constants.py,sha256=8r0JaNMhLR8X6pC6TnNBLQ-TVcHEbRWk1sJ-LSIj444,7821
14
14
  huggingface_hub/errors.py,sha256=jCYKeSOsQNfH2t3TsW8kIAXXS1aWl9PaAq3prFfz4CI,704
15
15
  huggingface_hub/fastai_utils.py,sha256=5I7zAfgHJU_mZnxnf9wgWTHrCRu_EAV8VTangDVfE_o,16676
16
- huggingface_hub/file_download.py,sha256=sukzPZVb3cWrnS3tmy_rzpVYJBHXnHKMvoIB1L4h1YU,77567
16
+ huggingface_hub/file_download.py,sha256=DmOEVmhEsRnX8M0kZmLPLC76eptMT0riTwtThFioV8Q,77476
17
17
  huggingface_hub/hf_api.py,sha256=GeE8iATWVij6Ntr9H0jlhRiOYEOsYAtu3ijUMLCPuFI,367572
18
- huggingface_hub/hf_file_system.py,sha256=8-gNR_BsZccS2yfgub4D0GuIk75G4Tu40AJXmdnsOLg,34180
19
- huggingface_hub/hub_mixin.py,sha256=00KjpYISAnTOJ_dbRWyPwsFfhAz--VkAOMtoCKkZRoQ,29815
18
+ huggingface_hub/hf_file_system.py,sha256=JUCT-VZBesDCB-uN__fvQt3uprGQETGnUlzjC7StQLM,37272
19
+ huggingface_hub/hub_mixin.py,sha256=JSQOLPNjj58AbK8ZL7mQ5sqyYp7kM7_QOd_J4LwtxMg,30157
20
20
  huggingface_hub/inference_api.py,sha256=UXOKu_Ez2I3hDsjguqCcCrj03WFDndehpngYiIAucdg,8331
21
21
  huggingface_hub/keras_mixin.py,sha256=8L0FEIWy_kmKsGI5d61q_33dGYbmLGhy4kZbqn-YFns,19681
22
22
  huggingface_hub/lfs.py,sha256=sXSd48kBIaPQHM19I01nA7ShdlhfDVsY5XliKZWP47s,19402
@@ -105,9 +105,9 @@ huggingface_hub/utils/insecure_hashlib.py,sha256=OjxlvtSQHpbLp9PWSrXBDJ0wHjxCBU-
105
105
  huggingface_hub/utils/logging.py,sha256=Cp03s0uEl3kDM9XHQW9a8GAoExODQ-e7kEtgMt-_To8,4728
106
106
  huggingface_hub/utils/sha.py,sha256=QLlIwPCyz46MmUc_4L8xl87KfYoBks9kPgsMZ5JCz-o,902
107
107
  huggingface_hub/utils/tqdm.py,sha256=2H80n_kDpvp7P4i7MaYR47t41i0l6ODi5mab1oof1dk,6335
108
- huggingface_hub-0.22.0rc0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
109
- huggingface_hub-0.22.0rc0.dist-info/METADATA,sha256=YAhHQyqZw4L6cVKuJhFY23COccMc_NyhgmK8Tt5nRj8,12872
110
- huggingface_hub-0.22.0rc0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
111
- huggingface_hub-0.22.0rc0.dist-info/entry_points.txt,sha256=Y3Z2L02rBG7va_iE6RPXolIgwOdwUFONyRN3kXMxZ0g,131
112
- huggingface_hub-0.22.0rc0.dist-info/top_level.txt,sha256=8KzlQJAY4miUvjAssOAJodqKOw3harNzuiwGQ9qLSSk,16
113
- huggingface_hub-0.22.0rc0.dist-info/RECORD,,
108
+ huggingface_hub-0.22.0rc1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
109
+ huggingface_hub-0.22.0rc1.dist-info/METADATA,sha256=OP7t0L-2KcKp-C6u3ObdOYX_NiAz85CUclxheDOoQrU,12872
110
+ huggingface_hub-0.22.0rc1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
111
+ huggingface_hub-0.22.0rc1.dist-info/entry_points.txt,sha256=Y3Z2L02rBG7va_iE6RPXolIgwOdwUFONyRN3kXMxZ0g,131
112
+ huggingface_hub-0.22.0rc1.dist-info/top_level.txt,sha256=8KzlQJAY4miUvjAssOAJodqKOw3harNzuiwGQ9qLSSk,16
113
+ huggingface_hub-0.22.0rc1.dist-info/RECORD,,