huggingface-hub 0.23.0__py3-none-any.whl → 0.23.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

@@ -46,7 +46,7 @@ import sys
46
46
  from typing import TYPE_CHECKING
47
47
 
48
48
 
49
- __version__ = "0.23.0"
49
+ __version__ = "0.23.2"
50
50
 
51
51
  # Alphabetical order of definitions is ensured in tests
52
52
  # WARNING: any comment added in this dictionary definition will be lost when
@@ -517,6 +517,9 @@ class HfFileSystem(fsspec.AbstractFileSystem):
517
517
  else:
518
518
  out = None
519
519
  parent_path = self._parent(path)
520
+ if not expand_info and parent_path not in self.dircache:
521
+ # Fill the cache with cheap call
522
+ self.ls(parent_path, expand_info=False)
520
523
  if parent_path in self.dircache:
521
524
  # Check if the path is in the cache
522
525
  out1 = [o for o in self.dircache[parent_path] if o["name"] == path]
@@ -681,6 +684,9 @@ class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
681
684
  f"{e}.\nMake sure the repository and revision exist before writing data."
682
685
  ) from e
683
686
  raise
687
+ # avoid an unnecessary .info() call with expensive expand_info=True to instantiate .details
688
+ if kwargs.get("mode", "rb") == "rb":
689
+ self.details = fs.info(self.resolved_path.unresolve(), expand_info=False)
684
690
  super().__init__(fs, self.resolved_path.unresolve(), **kwargs)
685
691
  self.fs: HfFileSystem
686
692
 
@@ -14,7 +14,7 @@
14
14
  """Contains helpers to split tensors into shards."""
15
15
 
16
16
  from dataclasses import dataclass, field
17
- from typing import Any, Callable, Dict, List, Optional, TypeVar
17
+ from typing import Any, Callable, Dict, List, Optional, TypeVar, Union
18
18
 
19
19
  from .. import logging
20
20
 
@@ -46,7 +46,7 @@ def split_state_dict_into_shards_factory(
46
46
  get_tensor_size: TensorSizeFn_T,
47
47
  get_storage_id: StorageIDFn_T = lambda tensor: None,
48
48
  filename_pattern: str = FILENAME_PATTERN,
49
- max_shard_size: int = MAX_SHARD_SIZE,
49
+ max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
50
50
  ) -> StateDictSplit:
51
51
  """
52
52
  Split a model state dictionary in shards so that each shard is smaller than a given size.
@@ -89,6 +89,9 @@ def split_state_dict_into_shards_factory(
89
89
  current_shard_size = 0
90
90
  total_size = 0
91
91
 
92
+ if isinstance(max_shard_size, str):
93
+ max_shard_size = parse_size_to_int(max_shard_size)
94
+
92
95
  for key, tensor in state_dict.items():
93
96
  # when bnb serialization is used the weights in the state dict can be strings
94
97
  # check: https://github.com/huggingface/transformers/pull/24416 for more details
@@ -167,3 +170,44 @@ def split_state_dict_into_shards_factory(
167
170
  filename_to_tensors=filename_to_tensors,
168
171
  tensor_to_filename=tensor_name_to_filename,
169
172
  )
173
+
174
+
175
+ SIZE_UNITS = {
176
+ "TB": 10**12,
177
+ "GB": 10**9,
178
+ "MB": 10**6,
179
+ "KB": 10**3,
180
+ }
181
+
182
+
183
+ def parse_size_to_int(size_as_str: str) -> int:
184
+ """
185
+ Parse a size expressed as a string with digits and unit (like `"5MB"`) to an integer (in bytes).
186
+
187
+ Supported units are "TB", "GB", "MB", "KB".
188
+
189
+ Args:
190
+ size_as_str (`str`): The size to convert. Will be directly returned if an `int`.
191
+
192
+ Example:
193
+
194
+ ```py
195
+ >>> parse_size_to_int("5MB")
196
+ 5000000
197
+ ```
198
+ """
199
+ size_as_str = size_as_str.strip()
200
+
201
+ # Parse unit
202
+ unit = size_as_str[-2:].upper()
203
+ if unit not in SIZE_UNITS:
204
+ raise ValueError(f"Unit '{unit}' not supported. Supported units are TB, GB, MB, KB. Got '{size_as_str}'.")
205
+ multiplier = SIZE_UNITS[unit]
206
+
207
+ # Parse value
208
+ try:
209
+ value = float(size_as_str[:-2].strip())
210
+ except ValueError as e:
211
+ raise ValueError(f"Could not parse the size value from '{size_as_str}': {e}") from e
212
+
213
+ return int(value * multiplier)
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  """Contains numpy-specific helpers."""
15
15
 
16
- from typing import TYPE_CHECKING, Dict
16
+ from typing import TYPE_CHECKING, Dict, Union
17
17
 
18
18
  from ._base import FILENAME_PATTERN, MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
19
19
 
@@ -26,7 +26,7 @@ def split_numpy_state_dict_into_shards(
26
26
  state_dict: Dict[str, "np.ndarray"],
27
27
  *,
28
28
  filename_pattern: str = FILENAME_PATTERN,
29
- max_shard_size: int = MAX_SHARD_SIZE,
29
+ max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
30
30
  ) -> StateDictSplit:
31
31
  """
32
32
  Split a model state dictionary in shards so that each shard is smaller than a given size.
@@ -15,7 +15,7 @@
15
15
 
16
16
  import math
17
17
  import re
18
- from typing import TYPE_CHECKING, Dict
18
+ from typing import TYPE_CHECKING, Dict, Union
19
19
 
20
20
  from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
21
21
 
@@ -28,7 +28,7 @@ def split_tf_state_dict_into_shards(
28
28
  state_dict: Dict[str, "tf.Tensor"],
29
29
  *,
30
30
  filename_pattern: str = "tf_model{suffix}.h5",
31
- max_shard_size: int = MAX_SHARD_SIZE,
31
+ max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
32
32
  ) -> StateDictSplit:
33
33
  """
34
34
  Split a model state dictionary in shards so that each shard is smaller than a given size.
@@ -15,7 +15,7 @@
15
15
 
16
16
  import importlib
17
17
  from functools import lru_cache
18
- from typing import TYPE_CHECKING, Dict, Tuple
18
+ from typing import TYPE_CHECKING, Dict, Tuple, Union
19
19
 
20
20
  from ._base import FILENAME_PATTERN, MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
21
21
 
@@ -28,7 +28,7 @@ def split_torch_state_dict_into_shards(
28
28
  state_dict: Dict[str, "torch.Tensor"],
29
29
  *,
30
30
  filename_pattern: str = FILENAME_PATTERN,
31
- max_shard_size: int = MAX_SHARD_SIZE,
31
+ max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
32
32
  ) -> StateDictSplit:
33
33
  """
34
34
  Split a model state dictionary in shards so that each shard is smaller than a given size.
@@ -67,7 +67,7 @@ def split_torch_state_dict_into_shards(
67
67
 
68
68
  >>> def save_state_dict(state_dict: Dict[str, torch.Tensor], save_directory: str):
69
69
  ... state_dict_split = split_torch_state_dict_into_shards(state_dict)
70
- ... for filename, tensors in state_dict_split.filename_to_tensors.values():
70
+ ... for filename, tensors in state_dict_split.filename_to_tensors.items():
71
71
  ... shard = {tensor: state_dict[tensor] for tensor in tensors}
72
72
  ... safe_save_file(
73
73
  ... shard,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: huggingface-hub
3
- Version: 0.23.0
3
+ Version: 0.23.2
4
4
  Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
5
5
  Home-page: https://github.com/huggingface/huggingface_hub
6
6
  Author: Hugging Face, Inc.
@@ -1,4 +1,4 @@
1
- huggingface_hub/__init__.py,sha256=xXb4llMkyN0SvoFdcDDPqiHEiI3C-sZsSHQYLo3NK04,32692
1
+ huggingface_hub/__init__.py,sha256=Q1uTvSPbnh_bO8QcgwbIbiQgB-XsnDbGwa7tpWxNl7w,32692
2
2
  huggingface_hub/_commit_api.py,sha256=Z1sQnJx1xWfspsX6vS8eGTmr-9QujIoItjbnJVVyyCQ,29299
3
3
  huggingface_hub/_commit_scheduler.py,sha256=nlJS_vnLb8i92NLrRwJX8Mg9QZ7f3kfLbLlQuEd5YjU,13647
4
4
  huggingface_hub/_inference_endpoints.py,sha256=rBx6xgnSJq0JtntF1_zphj7NsCmduICqgZfmvscdE_w,15667
@@ -16,7 +16,7 @@ huggingface_hub/errors.py,sha256=IM0lNbExLzaYEs0HrrPvY4-kyj6DiP2Szu7Jy9slHOE,208
16
16
  huggingface_hub/fastai_utils.py,sha256=5I7zAfgHJU_mZnxnf9wgWTHrCRu_EAV8VTangDVfE_o,16676
17
17
  huggingface_hub/file_download.py,sha256=n5ovYqh1-xe3ptRHuS-EXn6X_-3ZVI7C-pQrHD45DtA,82236
18
18
  huggingface_hub/hf_api.py,sha256=hyMkURhYXalCNG4Qqx3PhN7Ucru8m18ZidEok_T2504,375216
19
- huggingface_hub/hf_file_system.py,sha256=r7NGKIIF0o2GaFn_qZzvoCGi6Vyhc3BH8wcFGFztyCw,37425
19
+ huggingface_hub/hf_file_system.py,sha256=EHSWD6Pdm9ED-cgNh-ozoiz69pODssKrObKybVJPBQA,37830
20
20
  huggingface_hub/hub_mixin.py,sha256=ktwuDqSXFU2q2_xj676R-zag_tB3QEiMMVFueJ3YD9g,34644
21
21
  huggingface_hub/inference_api.py,sha256=UXOKu_Ez2I3hDsjguqCcCrj03WFDndehpngYiIAucdg,8331
22
22
  huggingface_hub/keras_mixin.py,sha256=2DF-hNGdxJCxqvcw46id-ExH_865ZAXsJd2vmpAuWHQ,19484
@@ -74,10 +74,10 @@ huggingface_hub/inference/_generated/types/zero_shot_classification.py,sha256=u6
74
74
  huggingface_hub/inference/_generated/types/zero_shot_image_classification.py,sha256=qVH6Ms0FjF8TraGy4BYiS8lmvGq9xiIDdXqGFynLHMA,1689
75
75
  huggingface_hub/inference/_generated/types/zero_shot_object_detection.py,sha256=PU4OOlQ2aAOosW2JlG2Z27MEQpmE6BxcygH_ns3w1KQ,1662
76
76
  huggingface_hub/serialization/__init__.py,sha256=W74TaCtYnMfpvGEQr1SS-OBmqPUFnM9AeWT9hTJCG9Y,910
77
- huggingface_hub/serialization/_base.py,sha256=AgO-16i-vyosbERnLSCFYgaXbVqQDM7xfIne8gsWrLQ,7133
78
- huggingface_hub/serialization/_numpy.py,sha256=idULJp1js6L6E8o-MiGVqNa4lBfXS2cfAmqivnpsaYs,2671
79
- huggingface_hub/serialization/_tensorflow.py,sha256=Rf4kw1NYxEaoUXB8aLtQLHrTjgobaEAJdzO0w0kbP58,3559
80
- huggingface_hub/serialization/_torch.py,sha256=xYR6e_G9laMTroWLiQRABSuloTQuuRSQNyYHdT_rmXU,7687
77
+ huggingface_hub/serialization/_base.py,sha256=2wxdid6ee8RASEKhCkpNdP8Kj9x4dRm6j8h72L1AtFQ,8239
78
+ huggingface_hub/serialization/_numpy.py,sha256=E-boJoUuDdyMTaAMRIiHha7F9GH9YhFNQPEioG4UkaY,2690
79
+ huggingface_hub/serialization/_tensorflow.py,sha256=4Wf_wzmLSzZua9hGGmArfngDzz3yw19PWJMdTT76uxc,3578
80
+ huggingface_hub/serialization/_torch.py,sha256=t-pTq4O3NpAprVJIojtC8Rq-kNJ889IluJtJtoLoqVk,7705
81
81
  huggingface_hub/templates/datasetcard_template.md,sha256=W-EMqR6wndbrnZorkVv56URWPG49l7MATGeI015kTvs,5503
82
82
  huggingface_hub/templates/modelcard_template.md,sha256=4AqArS3cqdtbit5Bo-DhjcnDFR-pza5hErLLTPM4Yuc,6870
83
83
  huggingface_hub/utils/__init__.py,sha256=44yhxTtWsuMGrZcALK-3UuVazGBtc94z9nZwLmLnu8w,3589
@@ -107,9 +107,9 @@ huggingface_hub/utils/insecure_hashlib.py,sha256=OjxlvtSQHpbLp9PWSrXBDJ0wHjxCBU-
107
107
  huggingface_hub/utils/logging.py,sha256=Cp03s0uEl3kDM9XHQW9a8GAoExODQ-e7kEtgMt-_To8,4728
108
108
  huggingface_hub/utils/sha.py,sha256=QLlIwPCyz46MmUc_4L8xl87KfYoBks9kPgsMZ5JCz-o,902
109
109
  huggingface_hub/utils/tqdm.py,sha256=x35PqUA8bBBztPrqhv87Y_TGl5CdlfBs4pe6k1YyDJ8,9390
110
- huggingface_hub-0.23.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
111
- huggingface_hub-0.23.0.dist-info/METADATA,sha256=_OWllpyp_iC9C7uiCKcZfHiPBLna6BYgSY3FCx7zr-A,12994
112
- huggingface_hub-0.23.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
113
- huggingface_hub-0.23.0.dist-info/entry_points.txt,sha256=Y3Z2L02rBG7va_iE6RPXolIgwOdwUFONyRN3kXMxZ0g,131
114
- huggingface_hub-0.23.0.dist-info/top_level.txt,sha256=8KzlQJAY4miUvjAssOAJodqKOw3harNzuiwGQ9qLSSk,16
115
- huggingface_hub-0.23.0.dist-info/RECORD,,
110
+ huggingface_hub-0.23.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
111
+ huggingface_hub-0.23.2.dist-info/METADATA,sha256=jeheWjcbLyu4hs7FgnFpv_6VOHF6aTNe7X-5TzYEtaA,12994
112
+ huggingface_hub-0.23.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
113
+ huggingface_hub-0.23.2.dist-info/entry_points.txt,sha256=Y3Z2L02rBG7va_iE6RPXolIgwOdwUFONyRN3kXMxZ0g,131
114
+ huggingface_hub-0.23.2.dist-info/top_level.txt,sha256=8KzlQJAY4miUvjAssOAJodqKOw3harNzuiwGQ9qLSSk,16
115
+ huggingface_hub-0.23.2.dist-info/RECORD,,