huggingface-hub 0.26.3__py3-none-any.whl → 0.27.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (61) hide show
  1. huggingface_hub/__init__.py +49 -23
  2. huggingface_hub/_commit_scheduler.py +30 -4
  3. huggingface_hub/_local_folder.py +0 -4
  4. huggingface_hub/_login.py +38 -54
  5. huggingface_hub/_snapshot_download.py +6 -3
  6. huggingface_hub/_tensorboard_logger.py +2 -3
  7. huggingface_hub/_upload_large_folder.py +1 -1
  8. huggingface_hub/errors.py +19 -0
  9. huggingface_hub/fastai_utils.py +3 -2
  10. huggingface_hub/file_download.py +10 -12
  11. huggingface_hub/hf_api.py +102 -498
  12. huggingface_hub/hf_file_system.py +274 -35
  13. huggingface_hub/hub_mixin.py +5 -25
  14. huggingface_hub/inference/_client.py +185 -136
  15. huggingface_hub/inference/_common.py +2 -2
  16. huggingface_hub/inference/_generated/_async_client.py +186 -137
  17. huggingface_hub/inference/_generated/types/__init__.py +31 -10
  18. huggingface_hub/inference/_generated/types/audio_classification.py +3 -5
  19. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +6 -9
  20. huggingface_hub/inference/_generated/types/chat_completion.py +8 -5
  21. huggingface_hub/inference/_generated/types/depth_estimation.py +1 -1
  22. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -6
  23. huggingface_hub/inference/_generated/types/feature_extraction.py +1 -1
  24. huggingface_hub/inference/_generated/types/fill_mask.py +2 -4
  25. huggingface_hub/inference/_generated/types/image_classification.py +3 -5
  26. huggingface_hub/inference/_generated/types/image_segmentation.py +2 -4
  27. huggingface_hub/inference/_generated/types/image_to_image.py +2 -4
  28. huggingface_hub/inference/_generated/types/image_to_text.py +6 -9
  29. huggingface_hub/inference/_generated/types/object_detection.py +2 -4
  30. huggingface_hub/inference/_generated/types/question_answering.py +2 -4
  31. huggingface_hub/inference/_generated/types/sentence_similarity.py +1 -1
  32. huggingface_hub/inference/_generated/types/summarization.py +2 -4
  33. huggingface_hub/inference/_generated/types/table_question_answering.py +21 -3
  34. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -4
  35. huggingface_hub/inference/_generated/types/text_classification.py +4 -10
  36. huggingface_hub/inference/_generated/types/text_to_audio.py +7 -10
  37. huggingface_hub/inference/_generated/types/text_to_image.py +2 -4
  38. huggingface_hub/inference/_generated/types/text_to_speech.py +7 -10
  39. huggingface_hub/inference/_generated/types/token_classification.py +11 -12
  40. huggingface_hub/inference/_generated/types/translation.py +2 -4
  41. huggingface_hub/inference/_generated/types/video_classification.py +3 -4
  42. huggingface_hub/inference/_generated/types/visual_question_answering.py +2 -5
  43. huggingface_hub/inference/_generated/types/zero_shot_classification.py +8 -18
  44. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +9 -19
  45. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +7 -9
  46. huggingface_hub/keras_mixin.py +3 -2
  47. huggingface_hub/lfs.py +2 -5
  48. huggingface_hub/repocard_data.py +4 -4
  49. huggingface_hub/serialization/__init__.py +2 -0
  50. huggingface_hub/serialization/_dduf.py +387 -0
  51. huggingface_hub/serialization/_torch.py +407 -25
  52. huggingface_hub/utils/_cache_manager.py +1 -1
  53. huggingface_hub/utils/_headers.py +9 -25
  54. huggingface_hub/utils/tqdm.py +15 -0
  55. {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc1.dist-info}/METADATA +8 -3
  56. {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc1.dist-info}/RECORD +60 -60
  57. huggingface_hub/_multi_commits.py +0 -306
  58. {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc1.dist-info}/LICENSE +0 -0
  59. {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc1.dist-info}/WHEEL +0 -0
  60. {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc1.dist-info}/entry_points.txt +0 -0
  61. {huggingface_hub-0.26.3.dist-info → huggingface_hub-0.27.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,387 @@
1
+ import json
2
+ import logging
3
+ import mmap
4
+ import os
5
+ import shutil
6
+ import zipfile
7
+ from contextlib import contextmanager
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Dict, Generator, Iterable, Tuple, Union
11
+
12
+ from ..errors import DDUFCorruptedFileError, DDUFExportError, DDUFInvalidEntryNameError
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ DDUF_ALLOWED_ENTRIES = {
18
+ # Allowed file extensions in a DDUF file
19
+ ".json",
20
+ ".model",
21
+ ".safetensors",
22
+ ".txt",
23
+ }
24
+
25
+ DDUF_FOLDER_REQUIRED_ENTRIES = {
26
+ # Each folder must contain at least one of these entries
27
+ "config.json",
28
+ "tokenizer_config.json",
29
+ "preprocessor_config.json",
30
+ "scheduler_config.json",
31
+ }
32
+
33
+
34
+ @dataclass
35
+ class DDUFEntry:
36
+ """Object representing a file entry in a DDUF file.
37
+
38
+ See [`read_dduf_file`] for how to read a DDUF file.
39
+
40
+ Attributes:
41
+ filename (str):
42
+ The name of the file in the DDUF archive.
43
+ offset (int):
44
+ The offset of the file in the DDUF archive.
45
+ length (int):
46
+ The length of the file in the DDUF archive.
47
+ dduf_path (str):
48
+ The path to the DDUF archive (for internal use).
49
+ """
50
+
51
+ filename: str
52
+ length: int
53
+ offset: int
54
+
55
+ dduf_path: Path = field(repr=False)
56
+
57
+ @contextmanager
58
+ def as_mmap(self) -> Generator[bytes, None, None]:
59
+ """Open the file as a memory-mapped file.
60
+
61
+ Useful to load safetensors directly from the file.
62
+
63
+ Example:
64
+ ```py
65
+ >>> import safetensors.torch
66
+ >>> with entry.as_mmap() as mm:
67
+ ... tensors = safetensors.torch.load(mm)
68
+ ```
69
+ """
70
+ with self.dduf_path.open("rb") as f:
71
+ with mmap.mmap(f.fileno(), length=0, access=mmap.ACCESS_READ) as mm:
72
+ yield mm[self.offset : self.offset + self.length]
73
+
74
+ def read_text(self, encoding: str = "utf-8") -> str:
75
+ """Read the file as text.
76
+
77
+ Useful for '.txt' and '.json' entries.
78
+
79
+ Example:
80
+ ```py
81
+ >>> import json
82
+ >>> index = json.loads(entry.read_text())
83
+ ```
84
+ """
85
+ with self.dduf_path.open("rb") as f:
86
+ f.seek(self.offset)
87
+ return f.read(self.length).decode(encoding=encoding)
88
+
89
+
90
+ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
91
+ """
92
+ Read a DDUF file and return a dictionary of entries.
93
+
94
+ Only the metadata is read, the data is not loaded in memory.
95
+
96
+ Args:
97
+ dduf_path (`str` or `os.PathLike`):
98
+ The path to the DDUF file to read.
99
+
100
+ Returns:
101
+ `Dict[str, DDUFEntry]`:
102
+ A dictionary of [`DDUFEntry`] indexed by filename.
103
+
104
+ Raises:
105
+ - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
106
+
107
+ Example:
108
+ ```python
109
+ >>> import json
110
+ >>> import safetensors.torch
111
+ >>> from huggingface_hub import read_dduf_file
112
+
113
+ # Read DDUF metadata
114
+ >>> dduf_entries = read_dduf_file("FLUX.1-dev.dduf")
115
+
116
+ # Returns a mapping filename <> DDUFEntry
117
+ >>> dduf_entries["model_index.json"]
118
+ DDUFEntry(filename='model_index.json', offset=66, length=587)
119
+
120
+ # Load model index as JSON
121
+ >>> json.loads(dduf_entries["model_index.json"].read_text())
122
+ {'_class_name': 'FluxPipeline', '_diffusers_version': '0.32.0.dev0', '_name_or_path': 'black-forest-labs/FLUX.1-dev', ...
123
+
124
+ # Load VAE weights using safetensors
125
+ >>> with dduf_entries["vae/diffusion_pytorch_model.safetensors"].as_mmap() as mm:
126
+ ... state_dict = safetensors.torch.load(mm)
127
+ ```
128
+ """
129
+ entries = {}
130
+ dduf_path = Path(dduf_path)
131
+ logger.info(f"Reading DDUF file {dduf_path}")
132
+ with zipfile.ZipFile(str(dduf_path), "r") as zf:
133
+ for info in zf.infolist():
134
+ logger.debug(f"Reading entry {info.filename}")
135
+ if info.compress_type != zipfile.ZIP_STORED:
136
+ raise DDUFCorruptedFileError("Data must not be compressed in DDUF file.")
137
+
138
+ try:
139
+ _validate_dduf_entry_name(info.filename)
140
+ except DDUFInvalidEntryNameError as e:
141
+ raise DDUFCorruptedFileError(f"Invalid entry name in DDUF file: {info.filename}") from e
142
+
143
+ offset = _get_data_offset(zf, info)
144
+
145
+ entries[info.filename] = DDUFEntry(
146
+ filename=info.filename, offset=offset, length=info.file_size, dduf_path=dduf_path
147
+ )
148
+
149
+ # Consistency checks on the DDUF file
150
+ if "model_index.json" not in entries:
151
+ raise DDUFCorruptedFileError("Missing required 'model_index.json' entry in DDUF file.")
152
+ index = json.loads(entries["model_index.json"].read_text())
153
+ _validate_dduf_structure(index, entries.keys())
154
+
155
+ logger.info(f"Done reading DDUF file {dduf_path}. Found {len(entries)} entries")
156
+ return entries
157
+
158
+
159
+ def export_entries_as_dduf(
160
+ dduf_path: Union[str, os.PathLike], entries: Iterable[Tuple[str, Union[str, Path, bytes]]]
161
+ ) -> None:
162
+ """Write a DDUF file from an iterable of entries.
163
+
164
+ This is a lower-level helper than [`export_folder_as_dduf`] that allows more flexibility when serializing data.
165
+ In particular, you don't need to save the data on disk before exporting it in the DDUF file.
166
+
167
+ Args:
168
+ dduf_path (`str` or `os.PathLike`):
169
+ The path to the DDUF file to write.
170
+ entries (`Iterable[Tuple[str, Union[str, Path, bytes]]]`):
171
+ An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
172
+ The filename should be the path to the file in the DDUF archive.
173
+ The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.
174
+
175
+ Raises:
176
+ - [`DDUFExportError`]: If anything goes wrong during the export (e.g. invalid entry name, missing 'model_index.json', etc.).
177
+
178
+ Example:
179
+ ```python
180
+ # Export specific files from the local disk.
181
+ >>> from huggingface_hub import export_entries_as_dduf
182
+ >>> export_entries_as_dduf(
183
+ ... dduf_path="stable-diffusion-v1-4-FP16.dduf",
184
+ ... entries=[ # List entries to add to the DDUF file (here, only FP16 weights)
185
+ ... ("model_index.json", "path/to/model_index.json"),
186
+ ... ("vae/config.json", "path/to/vae/config.json"),
187
+ ... ("vae/diffusion_pytorch_model.fp16.safetensors", "path/to/vae/diffusion_pytorch_model.fp16.safetensors"),
188
+ ... ("text_encoder/config.json", "path/to/text_encoder/config.json"),
189
+ ... ("text_encoder/model.fp16.safetensors", "path/to/text_encoder/model.fp16.safetensors"),
190
+ ... # ... add more entries here
191
+ ... ]
192
+ ... )
193
+ ```
194
+
195
+ ```python
196
+ # Export state_dicts one by one from a loaded pipeline
197
+ >>> from diffusers import DiffusionPipeline
198
+ >>> from typing import Generator, Tuple
199
+ >>> import safetensors.torch
200
+ >>> from huggingface_hub import export_entries_as_dduf
201
+ >>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
202
+ ... # ... do some work with the pipeline
203
+
204
+ >>> def as_entries(pipe: DiffusionPipeline) -> Generator[Tuple[str, bytes], None, None]:
205
+ ... # Build an generator that yields the entries to add to the DDUF file.
206
+ ... # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
207
+ ... # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
208
+ ... yield "vae/config.json", pipe.vae.to_json_string().encode()
209
+ ... yield "vae/diffusion_pytorch_model.safetensors", safetensors.torch.save(pipe.vae.state_dict())
210
+ ... yield "text_encoder/config.json", pipe.text_encoder.config.to_json_string().encode()
211
+ ... yield "text_encoder/model.safetensors", safetensors.torch.save(pipe.text_encoder.state_dict())
212
+ ... # ... add more entries here
213
+
214
+ >>> export_entries_as_dduf(dduf_path="stable-diffusion-v1-4.dduf", entries=as_entries(pipe))
215
+ ```
216
+ """
217
+ logger.info(f"Exporting DDUF file '{dduf_path}'")
218
+ filenames = set()
219
+ index = None
220
+ with zipfile.ZipFile(str(dduf_path), "w", zipfile.ZIP_STORED) as archive:
221
+ for filename, content in entries:
222
+ if filename in filenames:
223
+ raise DDUFExportError(f"Can't add duplicate entry: {filename}")
224
+ filenames.add(filename)
225
+
226
+ if filename == "model_index.json":
227
+ try:
228
+ index = json.loads(_load_content(content).decode())
229
+ except json.JSONDecodeError as e:
230
+ raise DDUFExportError("Failed to parse 'model_index.json'.") from e
231
+
232
+ try:
233
+ filename = _validate_dduf_entry_name(filename)
234
+ except DDUFInvalidEntryNameError as e:
235
+ raise DDUFExportError(f"Invalid entry name: {filename}") from e
236
+ logger.debug(f"Adding entry '{filename}' to DDUF file")
237
+ _dump_content_in_archive(archive, filename, content)
238
+
239
+ # Consistency checks on the DDUF file
240
+ if index is None:
241
+ raise DDUFExportError("Missing required 'model_index.json' entry in DDUF file.")
242
+ try:
243
+ _validate_dduf_structure(index, filenames)
244
+ except DDUFCorruptedFileError as e:
245
+ raise DDUFExportError("Invalid DDUF file structure.") from e
246
+
247
+ logger.info(f"Done writing DDUF file {dduf_path}")
248
+
249
+
250
+ def export_folder_as_dduf(dduf_path: Union[str, os.PathLike], folder_path: Union[str, os.PathLike]) -> None:
251
+ """
252
+ Export a folder as a DDUF file.
253
+
254
+ AUses [`export_entries_as_dduf`] under the hood.
255
+
256
+ Args:
257
+ dduf_path (`str` or `os.PathLike`):
258
+ The path to the DDUF file to write.
259
+ folder_path (`str` or `os.PathLike`):
260
+ The path to the folder containing the diffusion model.
261
+
262
+ Example:
263
+ ```python
264
+ >>> from huggingface_hub import export_folder_as_dduf
265
+ >>> export_folder_as_dduf(dduf_path="FLUX.1-dev.dduf", folder_path="path/to/FLUX.1-dev")
266
+ ```
267
+ """
268
+ folder_path = Path(folder_path)
269
+
270
+ def _iterate_over_folder() -> Iterable[Tuple[str, Path]]:
271
+ for path in Path(folder_path).glob("**/*"):
272
+ if not path.is_file():
273
+ continue
274
+ if path.suffix not in DDUF_ALLOWED_ENTRIES:
275
+ logger.debug(f"Skipping file '{path}' (file type not allowed)")
276
+ continue
277
+ path_in_archive = path.relative_to(folder_path)
278
+ if len(path_in_archive.parts) >= 3:
279
+ logger.debug(f"Skipping file '{path}' (nested directories not allowed)")
280
+ continue
281
+ yield path_in_archive.as_posix(), path
282
+
283
+ export_entries_as_dduf(dduf_path, _iterate_over_folder())
284
+
285
+
286
+ def _dump_content_in_archive(archive: zipfile.ZipFile, filename: str, content: Union[str, os.PathLike, bytes]) -> None:
287
+ with archive.open(filename, "w", force_zip64=True) as archive_fh:
288
+ if isinstance(content, (str, Path)):
289
+ content_path = Path(content)
290
+ with content_path.open("rb") as content_fh:
291
+ shutil.copyfileobj(content_fh, archive_fh, 1024 * 1024 * 8) # type: ignore[misc]
292
+ elif isinstance(content, bytes):
293
+ archive_fh.write(content)
294
+ else:
295
+ raise DDUFExportError(f"Invalid content type for {filename}. Must be str, Path or bytes.")
296
+
297
+
298
+ def _load_content(content: Union[str, Path, bytes]) -> bytes:
299
+ """Load the content of an entry as bytes.
300
+
301
+ Used only for small checks (not to dump content into archive).
302
+ """
303
+ if isinstance(content, (str, Path)):
304
+ return Path(content).read_bytes()
305
+ elif isinstance(content, bytes):
306
+ return content
307
+ else:
308
+ raise DDUFExportError(f"Invalid content type. Must be str, Path or bytes. Got {type(content)}.")
309
+
310
+
311
+ def _validate_dduf_entry_name(entry_name: str) -> str:
312
+ if "." + entry_name.split(".")[-1] not in DDUF_ALLOWED_ENTRIES:
313
+ raise DDUFInvalidEntryNameError(f"File type not allowed: {entry_name}")
314
+ if "\\" in entry_name:
315
+ raise DDUFInvalidEntryNameError(f"Entry names must use UNIX separators ('/'). Got {entry_name}.")
316
+ entry_name = entry_name.strip("/")
317
+ if entry_name.count("/") > 1:
318
+ raise DDUFInvalidEntryNameError(f"DDUF only supports 1 level of directory. Got {entry_name}.")
319
+ return entry_name
320
+
321
+
322
+ def _validate_dduf_structure(index: Any, entry_names: Iterable[str]) -> None:
323
+ """
324
+ Consistency checks on the DDUF file structure.
325
+
326
+ Rules:
327
+ - The 'model_index.json' entry is required and must contain a dictionary.
328
+ - Each folder name must correspond to an entry in 'model_index.json'.
329
+ - Each folder must contain at least a config file ('config.json', 'tokenizer_config.json', 'preprocessor_config.json', 'scheduler_config.json').
330
+
331
+ Args:
332
+ index (Any):
333
+ The content of the 'model_index.json' entry.
334
+ entry_names (Iterable[str]):
335
+ The list of entry names in the DDUF file.
336
+
337
+ Raises:
338
+ - [`DDUFCorruptedFileError`]: If the DDUF file is corrupted (i.e. doesn't follow the DDUF format).
339
+ """
340
+ if not isinstance(index, dict):
341
+ raise DDUFCorruptedFileError(f"Invalid 'model_index.json' content. Must be a dictionary. Got {type(index)}.")
342
+
343
+ dduf_folders = {entry.split("/")[0] for entry in entry_names if "/" in entry}
344
+ for folder in dduf_folders:
345
+ if folder not in index:
346
+ raise DDUFCorruptedFileError(f"Missing required entry '{folder}' in 'model_index.json'.")
347
+ if not any(f"{folder}/{required_entry}" in entry_names for required_entry in DDUF_FOLDER_REQUIRED_ENTRIES):
348
+ raise DDUFCorruptedFileError(
349
+ f"Missing required file in folder '{folder}'. Must contains at least one of {DDUF_FOLDER_REQUIRED_ENTRIES}."
350
+ )
351
+
352
+
353
+ def _get_data_offset(zf: zipfile.ZipFile, info: zipfile.ZipInfo) -> int:
354
+ """
355
+ Calculate the data offset for a file in a ZIP archive.
356
+
357
+ Args:
358
+ zf (`zipfile.ZipFile`):
359
+ The opened ZIP file. Must be opened in read mode.
360
+ info (`zipfile.ZipInfo`):
361
+ The file info.
362
+
363
+ Returns:
364
+ int: The offset of the file data in the ZIP archive.
365
+ """
366
+ if zf.fp is None:
367
+ raise DDUFCorruptedFileError("ZipFile object must be opened in read mode.")
368
+
369
+ # Step 1: Get the local file header offset
370
+ header_offset = info.header_offset
371
+
372
+ # Step 2: Read the local file header
373
+ zf.fp.seek(header_offset)
374
+ local_file_header = zf.fp.read(30) # Fixed-size part of the local header
375
+
376
+ if len(local_file_header) < 30:
377
+ raise DDUFCorruptedFileError("Incomplete local file header.")
378
+
379
+ # Step 3: Parse the header fields to calculate the start of file data
380
+ # Local file header: https://en.wikipedia.org/wiki/ZIP_(file_format)#File_headers
381
+ filename_len = int.from_bytes(local_file_header[26:28], "little")
382
+ extra_field_len = int.from_bytes(local_file_header[28:30], "little")
383
+
384
+ # Data offset is after the fixed header, filename, and extra fields
385
+ data_offset = header_offset + 30 + filename_len + extra_field_len
386
+
387
+ return data_offset