huggingface-hub 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +4 -7
- huggingface_hub/_commit_api.py +126 -66
- huggingface_hub/_commit_scheduler.py +4 -7
- huggingface_hub/_login.py +10 -16
- huggingface_hub/_snapshot_download.py +119 -21
- huggingface_hub/_tensorboard_logger.py +2 -5
- huggingface_hub/_upload_large_folder.py +1 -2
- huggingface_hub/_webhooks_server.py +8 -20
- huggingface_hub/cli/_cli_utils.py +12 -6
- huggingface_hub/cli/download.py +32 -7
- huggingface_hub/cli/repo.py +137 -5
- huggingface_hub/dataclasses.py +122 -2
- huggingface_hub/errors.py +4 -0
- huggingface_hub/fastai_utils.py +22 -32
- huggingface_hub/file_download.py +234 -38
- huggingface_hub/hf_api.py +385 -424
- huggingface_hub/hf_file_system.py +55 -65
- huggingface_hub/inference/_client.py +27 -48
- huggingface_hub/inference/_generated/_async_client.py +27 -48
- huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
- huggingface_hub/inference/_mcp/agent.py +2 -5
- huggingface_hub/inference/_mcp/mcp_client.py +6 -8
- huggingface_hub/inference/_providers/__init__.py +16 -0
- huggingface_hub/inference/_providers/_common.py +2 -0
- huggingface_hub/inference/_providers/fal_ai.py +2 -0
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +14 -8
- huggingface_hub/repocard.py +12 -16
- huggingface_hub/serialization/_base.py +3 -6
- huggingface_hub/serialization/_torch.py +16 -34
- huggingface_hub/utils/__init__.py +1 -2
- huggingface_hub/utils/_cache_manager.py +42 -72
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_http.py +37 -68
- huggingface_hub/utils/_validators.py +2 -2
- huggingface_hub/utils/logging.py +8 -11
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/METADATA +2 -2
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/RECORD +44 -56
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/_cli_utils.py +0 -74
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -195
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -207
- huggingface_hub/commands/version.py +0 -40
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/LICENSE +0 -0
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/WHEEL +0 -0
- {huggingface_hub-1.0.0rc1.dist-info → huggingface_hub-1.0.0rc3.dist-info}/top_level.txt +0 -0
huggingface_hub/__init__.py
CHANGED
|
@@ -46,7 +46,7 @@ import sys
|
|
|
46
46
|
from typing import TYPE_CHECKING
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
__version__ = "1.0.0.
|
|
49
|
+
__version__ = "1.0.0.rc3"
|
|
50
50
|
|
|
51
51
|
# Alphabetical order of definitions is ensured in tests
|
|
52
52
|
# WARNING: any comment added in this dictionary definition will be lost when
|
|
@@ -138,6 +138,7 @@ _SUBMOD_ATTRS = {
|
|
|
138
138
|
"push_to_hub_fastai",
|
|
139
139
|
],
|
|
140
140
|
"file_download": [
|
|
141
|
+
"DryRunFileInfo",
|
|
141
142
|
"HfFileMetadata",
|
|
142
143
|
"_CACHED_NO_EXIST",
|
|
143
144
|
"get_hf_file_metadata",
|
|
@@ -513,8 +514,6 @@ _SUBMOD_ATTRS = {
|
|
|
513
514
|
"CorruptedCacheException",
|
|
514
515
|
"DeleteCacheStrategy",
|
|
515
516
|
"HFCacheInfo",
|
|
516
|
-
"HfHubAsyncTransport",
|
|
517
|
-
"HfHubTransport",
|
|
518
517
|
"cached_assets_path",
|
|
519
518
|
"close_session",
|
|
520
519
|
"dump_environment_info",
|
|
@@ -625,6 +624,7 @@ __all__ = [
|
|
|
625
624
|
"DocumentQuestionAnsweringInputData",
|
|
626
625
|
"DocumentQuestionAnsweringOutputElement",
|
|
627
626
|
"DocumentQuestionAnsweringParameters",
|
|
627
|
+
"DryRunFileInfo",
|
|
628
628
|
"EvalResult",
|
|
629
629
|
"FLAX_WEIGHTS_NAME",
|
|
630
630
|
"FeatureExtractionInput",
|
|
@@ -645,8 +645,6 @@ __all__ = [
|
|
|
645
645
|
"HfFileSystemFile",
|
|
646
646
|
"HfFileSystemResolvedPath",
|
|
647
647
|
"HfFileSystemStreamFile",
|
|
648
|
-
"HfHubAsyncTransport",
|
|
649
|
-
"HfHubTransport",
|
|
650
648
|
"ImageClassificationInput",
|
|
651
649
|
"ImageClassificationOutputElement",
|
|
652
650
|
"ImageClassificationOutputTransform",
|
|
@@ -1147,6 +1145,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1147
1145
|
)
|
|
1148
1146
|
from .file_download import (
|
|
1149
1147
|
_CACHED_NO_EXIST, # noqa: F401
|
|
1148
|
+
DryRunFileInfo, # noqa: F401
|
|
1150
1149
|
HfFileMetadata, # noqa: F401
|
|
1151
1150
|
get_hf_file_metadata, # noqa: F401
|
|
1152
1151
|
hf_hub_download, # noqa: F401
|
|
@@ -1515,8 +1514,6 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
1515
1514
|
CorruptedCacheException, # noqa: F401
|
|
1516
1515
|
DeleteCacheStrategy, # noqa: F401
|
|
1517
1516
|
HFCacheInfo, # noqa: F401
|
|
1518
|
-
HfHubAsyncTransport, # noqa: F401
|
|
1519
|
-
HfHubTransport, # noqa: F401
|
|
1520
1517
|
cached_assets_path, # noqa: F401
|
|
1521
1518
|
close_session, # noqa: F401
|
|
1522
1519
|
dump_environment_info, # noqa: F401
|
huggingface_hub/_commit_api.py
CHANGED
|
@@ -33,6 +33,7 @@ from .utils import (
|
|
|
33
33
|
validate_hf_hub_args,
|
|
34
34
|
)
|
|
35
35
|
from .utils import tqdm as hf_tqdm
|
|
36
|
+
from .utils._runtime import is_xet_available
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
if TYPE_CHECKING:
|
|
@@ -353,7 +354,7 @@ def _warn_on_overwriting_operations(operations: list[CommitOperation]) -> None:
|
|
|
353
354
|
|
|
354
355
|
|
|
355
356
|
@validate_hf_hub_args
|
|
356
|
-
def
|
|
357
|
+
def _upload_files(
|
|
357
358
|
*,
|
|
358
359
|
additions: list[CommitOperationAdd],
|
|
359
360
|
repo_type: str,
|
|
@@ -362,6 +363,86 @@ def _upload_lfs_files(
|
|
|
362
363
|
endpoint: Optional[str] = None,
|
|
363
364
|
num_threads: int = 5,
|
|
364
365
|
revision: Optional[str] = None,
|
|
366
|
+
create_pr: Optional[bool] = None,
|
|
367
|
+
):
|
|
368
|
+
"""
|
|
369
|
+
Negotiates per-file transfer (LFS vs Xet) and uploads in batches.
|
|
370
|
+
"""
|
|
371
|
+
xet_additions: list[CommitOperationAdd] = []
|
|
372
|
+
lfs_actions: list[dict[str, Any]] = []
|
|
373
|
+
lfs_oid2addop: dict[str, CommitOperationAdd] = {}
|
|
374
|
+
|
|
375
|
+
for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES):
|
|
376
|
+
chunk_list = [op for op in chunk]
|
|
377
|
+
|
|
378
|
+
transfers: list[str] = ["basic", "multipart"]
|
|
379
|
+
has_buffered_io_data = any(isinstance(op.path_or_fileobj, io.BufferedIOBase) for op in chunk_list)
|
|
380
|
+
if is_xet_available():
|
|
381
|
+
if not has_buffered_io_data:
|
|
382
|
+
transfers.append("xet")
|
|
383
|
+
else:
|
|
384
|
+
logger.warning(
|
|
385
|
+
"Uploading files as a binary IO buffer is not supported by Xet Storage. "
|
|
386
|
+
"Falling back to HTTP upload."
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
actions_chunk, errors_chunk, chosen_transfer = post_lfs_batch_info(
|
|
390
|
+
upload_infos=[op.upload_info for op in chunk_list],
|
|
391
|
+
repo_id=repo_id,
|
|
392
|
+
repo_type=repo_type,
|
|
393
|
+
revision=revision,
|
|
394
|
+
endpoint=endpoint,
|
|
395
|
+
headers=headers,
|
|
396
|
+
token=None, # already passed in 'headers'
|
|
397
|
+
transfers=transfers,
|
|
398
|
+
)
|
|
399
|
+
if errors_chunk:
|
|
400
|
+
message = "\n".join(
|
|
401
|
+
[
|
|
402
|
+
f"Encountered error for file with OID {err.get('oid')}: `{err.get('error', {}).get('message')}"
|
|
403
|
+
for err in errors_chunk
|
|
404
|
+
]
|
|
405
|
+
)
|
|
406
|
+
raise ValueError(f"LFS batch API returned errors:\n{message}")
|
|
407
|
+
|
|
408
|
+
# If server returns a transfer we didn't offer (e.g "xet" while uploading from BytesIO),
|
|
409
|
+
# fall back to LFS for this chunk.
|
|
410
|
+
if chosen_transfer == "xet" and ("xet" in transfers):
|
|
411
|
+
xet_additions.extend(chunk_list)
|
|
412
|
+
else:
|
|
413
|
+
lfs_actions.extend(actions_chunk)
|
|
414
|
+
for op in chunk_list:
|
|
415
|
+
lfs_oid2addop[op.upload_info.sha256.hex()] = op
|
|
416
|
+
|
|
417
|
+
if len(lfs_actions) > 0:
|
|
418
|
+
_upload_lfs_files(
|
|
419
|
+
actions=lfs_actions,
|
|
420
|
+
oid2addop=lfs_oid2addop,
|
|
421
|
+
headers=headers,
|
|
422
|
+
endpoint=endpoint,
|
|
423
|
+
num_threads=num_threads,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
if len(xet_additions) > 0:
|
|
427
|
+
_upload_xet_files(
|
|
428
|
+
additions=xet_additions,
|
|
429
|
+
repo_type=repo_type,
|
|
430
|
+
repo_id=repo_id,
|
|
431
|
+
headers=headers,
|
|
432
|
+
endpoint=endpoint,
|
|
433
|
+
revision=revision,
|
|
434
|
+
create_pr=create_pr,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
@validate_hf_hub_args
|
|
439
|
+
def _upload_lfs_files(
|
|
440
|
+
*,
|
|
441
|
+
actions: list[dict[str, Any]],
|
|
442
|
+
oid2addop: dict[str, CommitOperationAdd],
|
|
443
|
+
headers: dict[str, str],
|
|
444
|
+
endpoint: Optional[str] = None,
|
|
445
|
+
num_threads: int = 5,
|
|
365
446
|
):
|
|
366
447
|
"""
|
|
367
448
|
Uploads the content of `additions` to the Hub using the large file storage protocol.
|
|
@@ -370,9 +451,21 @@ def _upload_lfs_files(
|
|
|
370
451
|
- LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
|
|
371
452
|
|
|
372
453
|
Args:
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
454
|
+
actions (`list[dict[str, Any]]`):
|
|
455
|
+
LFS batch actions returned by the server.
|
|
456
|
+
oid2addop (`dict[str, CommitOperationAdd]`):
|
|
457
|
+
A dictionary mapping the OID of the file to the corresponding `CommitOperationAdd` object.
|
|
458
|
+
headers (`dict[str, str]`):
|
|
459
|
+
Headers to use for the request, including authorization headers and user agent.
|
|
460
|
+
endpoint (`str`, *optional*):
|
|
461
|
+
The endpoint to use for the request. Defaults to `constants.ENDPOINT`.
|
|
462
|
+
num_threads (`int`, *optional*):
|
|
463
|
+
The number of concurrent threads to use when uploading. Defaults to 5.
|
|
464
|
+
|
|
465
|
+
Raises:
|
|
466
|
+
[`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
|
|
467
|
+
If an upload failed for any reason
|
|
468
|
+
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
376
469
|
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
|
|
377
470
|
repo_id (`str`):
|
|
378
471
|
A namespace (user or an organization) and a repo name separated
|
|
@@ -392,50 +485,17 @@ def _upload_lfs_files(
|
|
|
392
485
|
[`HfHubHTTPError`]
|
|
393
486
|
If the LFS batch endpoint returned an HTTP error.
|
|
394
487
|
"""
|
|
395
|
-
#
|
|
396
|
-
# Upload instructions are retrieved by chunk of 256 files to avoid reaching
|
|
397
|
-
# the payload limit.
|
|
398
|
-
batch_actions: list[dict] = []
|
|
399
|
-
for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES):
|
|
400
|
-
batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
|
|
401
|
-
upload_infos=[op.upload_info for op in chunk],
|
|
402
|
-
repo_id=repo_id,
|
|
403
|
-
repo_type=repo_type,
|
|
404
|
-
revision=revision,
|
|
405
|
-
endpoint=endpoint,
|
|
406
|
-
headers=headers,
|
|
407
|
-
token=None, # already passed in 'headers'
|
|
408
|
-
)
|
|
409
|
-
|
|
410
|
-
# If at least 1 error, we do not retrieve information for other chunks
|
|
411
|
-
if batch_errors_chunk:
|
|
412
|
-
message = "\n".join(
|
|
413
|
-
[
|
|
414
|
-
f"Encountered error for file with OID {err.get('oid')}: `{err.get('error', {}).get('message')}"
|
|
415
|
-
for err in batch_errors_chunk
|
|
416
|
-
]
|
|
417
|
-
)
|
|
418
|
-
raise ValueError(f"LFS batch endpoint returned errors:\n{message}")
|
|
419
|
-
|
|
420
|
-
batch_actions += batch_actions_chunk
|
|
421
|
-
oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions}
|
|
422
|
-
|
|
423
|
-
# Step 2: ignore files that have already been uploaded
|
|
488
|
+
# Filter out files already present upstream
|
|
424
489
|
filtered_actions = []
|
|
425
|
-
for action in
|
|
490
|
+
for action in actions:
|
|
426
491
|
if action.get("actions") is None:
|
|
427
492
|
logger.debug(
|
|
428
|
-
f"Content of file {oid2addop[action['oid']].path_in_repo} is already"
|
|
429
|
-
" present upstream - skipping upload."
|
|
493
|
+
f"Content of file {oid2addop[action['oid']].path_in_repo} is already present upstream - skipping upload."
|
|
430
494
|
)
|
|
431
495
|
else:
|
|
432
496
|
filtered_actions.append(action)
|
|
433
497
|
|
|
434
|
-
|
|
435
|
-
logger.debug("No LFS files to upload.")
|
|
436
|
-
return
|
|
437
|
-
|
|
438
|
-
# Step 3: upload files concurrently according to these instructions
|
|
498
|
+
# Upload according to server-provided actions
|
|
439
499
|
def _wrapped_lfs_upload(batch_action) -> None:
|
|
440
500
|
try:
|
|
441
501
|
operation = oid2addop[batch_action["oid"]]
|
|
@@ -479,7 +539,7 @@ def _upload_xet_files(
|
|
|
479
539
|
This chunks the files and deduplicates the chunks before uploading them to xetcas storage.
|
|
480
540
|
|
|
481
541
|
Args:
|
|
482
|
-
additions (
|
|
542
|
+
additions (`` of `CommitOperationAdd`):
|
|
483
543
|
The files to be uploaded.
|
|
484
544
|
repo_type (`str`):
|
|
485
545
|
Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
|
|
@@ -576,30 +636,30 @@ def _upload_xet_files(
|
|
|
576
636
|
progress, progress_callback = None, None
|
|
577
637
|
|
|
578
638
|
try:
|
|
579
|
-
for
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
639
|
+
all_bytes_ops = [op for op in additions if isinstance(op.path_or_fileobj, bytes)]
|
|
640
|
+
all_paths_ops = [op for op in additions if isinstance(op.path_or_fileobj, (str, Path))]
|
|
641
|
+
|
|
642
|
+
if len(all_paths_ops) > 0:
|
|
643
|
+
all_paths = [str(op.path_or_fileobj) for op in all_paths_ops]
|
|
644
|
+
upload_files(
|
|
645
|
+
all_paths,
|
|
646
|
+
xet_endpoint,
|
|
647
|
+
access_token_info,
|
|
648
|
+
token_refresher,
|
|
649
|
+
progress_callback,
|
|
650
|
+
repo_type,
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
if len(all_bytes_ops) > 0:
|
|
654
|
+
all_bytes = [op.path_or_fileobj for op in all_bytes_ops]
|
|
655
|
+
upload_bytes(
|
|
656
|
+
all_bytes,
|
|
657
|
+
xet_endpoint,
|
|
658
|
+
access_token_info,
|
|
659
|
+
token_refresher,
|
|
660
|
+
progress_callback,
|
|
661
|
+
repo_type,
|
|
662
|
+
)
|
|
603
663
|
|
|
604
664
|
finally:
|
|
605
665
|
if progress is not None:
|
|
@@ -205,13 +205,10 @@ class CommitScheduler:
|
|
|
205
205
|
"""
|
|
206
206
|
Push folder to the Hub and return the commit info.
|
|
207
207
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
issues.
|
|
213
|
-
|
|
214
|
-
</Tip>
|
|
208
|
+
> [!WARNING]
|
|
209
|
+
> This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
|
|
210
|
+
> queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
|
|
211
|
+
> issues.
|
|
215
212
|
|
|
216
213
|
The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
|
|
217
214
|
uploads only changed files. If no changes are found, the method returns without committing anything. If you want
|
huggingface_hub/_login.py
CHANGED
|
@@ -20,7 +20,7 @@ from pathlib import Path
|
|
|
20
20
|
from typing import Optional
|
|
21
21
|
|
|
22
22
|
from . import constants
|
|
23
|
-
from .
|
|
23
|
+
from .cli._cli_utils import ANSI
|
|
24
24
|
from .utils import (
|
|
25
25
|
capture_output,
|
|
26
26
|
get_token,
|
|
@@ -70,21 +70,15 @@ def login(
|
|
|
70
70
|
To log in from outside of a script, one can also use `hf auth login` which is
|
|
71
71
|
a cli command that wraps [`login`].
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
> [!TIP]
|
|
74
|
+
> [`login`] is a drop-in replacement method for [`notebook_login`] as it wraps and
|
|
75
|
+
> extends its capabilities.
|
|
74
76
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
<Tip>
|
|
81
|
-
|
|
82
|
-
When the token is not passed, [`login`] will automatically detect if the script runs
|
|
83
|
-
in a notebook or not. However, this detection might not be accurate due to the
|
|
84
|
-
variety of notebooks that exists nowadays. If that is the case, you can always force
|
|
85
|
-
the UI by using [`notebook_login`] or [`interpreter_login`].
|
|
86
|
-
|
|
87
|
-
</Tip>
|
|
77
|
+
> [!TIP]
|
|
78
|
+
> When the token is not passed, [`login`] will automatically detect if the script runs
|
|
79
|
+
> in a notebook or not. However, this detection might not be accurate due to the
|
|
80
|
+
> variety of notebooks that exists nowadays. If that is the case, you can always force
|
|
81
|
+
> the UI by using [`notebook_login`] or [`interpreter_login`].
|
|
88
82
|
|
|
89
83
|
Args:
|
|
90
84
|
token (`str`, *optional*):
|
|
@@ -250,7 +244,7 @@ def interpreter_login(*, skip_if_logged_in: bool = False) -> None:
|
|
|
250
244
|
logger.info("User is already logged in.")
|
|
251
245
|
return
|
|
252
246
|
|
|
253
|
-
from .
|
|
247
|
+
from .cli.cache import _ask_for_confirmation_no_tui
|
|
254
248
|
|
|
255
249
|
print(_HF_LOGO_ASCII)
|
|
256
250
|
if get_token() is not None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Iterable, Optional, Union
|
|
3
|
+
from typing import Iterable, List, Literal, Optional, Union, overload
|
|
4
4
|
|
|
5
5
|
import httpx
|
|
6
6
|
from tqdm.auto import tqdm as base_tqdm
|
|
@@ -8,13 +8,14 @@ from tqdm.contrib.concurrent import thread_map
|
|
|
8
8
|
|
|
9
9
|
from . import constants
|
|
10
10
|
from .errors import (
|
|
11
|
+
DryRunError,
|
|
11
12
|
GatedRepoError,
|
|
12
13
|
HfHubHTTPError,
|
|
13
14
|
LocalEntryNotFoundError,
|
|
14
15
|
RepositoryNotFoundError,
|
|
15
16
|
RevisionNotFoundError,
|
|
16
17
|
)
|
|
17
|
-
from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
|
|
18
|
+
from .file_download import REGEX_COMMIT_HASH, DryRunFileInfo, hf_hub_download, repo_folder_name
|
|
18
19
|
from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo
|
|
19
20
|
from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
|
|
20
21
|
from .utils import tqdm as hf_tqdm
|
|
@@ -25,6 +26,81 @@ logger = logging.get_logger(__name__)
|
|
|
25
26
|
VERY_LARGE_REPO_THRESHOLD = 50000 # After this limit, we don't consider `repo_info.siblings` to be reliable enough
|
|
26
27
|
|
|
27
28
|
|
|
29
|
+
@overload
|
|
30
|
+
def snapshot_download(
|
|
31
|
+
repo_id: str,
|
|
32
|
+
*,
|
|
33
|
+
repo_type: Optional[str] = None,
|
|
34
|
+
revision: Optional[str] = None,
|
|
35
|
+
cache_dir: Union[str, Path, None] = None,
|
|
36
|
+
local_dir: Union[str, Path, None] = None,
|
|
37
|
+
library_name: Optional[str] = None,
|
|
38
|
+
library_version: Optional[str] = None,
|
|
39
|
+
user_agent: Optional[Union[dict, str]] = None,
|
|
40
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
41
|
+
force_download: bool = False,
|
|
42
|
+
token: Optional[Union[bool, str]] = None,
|
|
43
|
+
local_files_only: bool = False,
|
|
44
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
45
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
46
|
+
max_workers: int = 8,
|
|
47
|
+
tqdm_class: Optional[type[base_tqdm]] = None,
|
|
48
|
+
headers: Optional[dict[str, str]] = None,
|
|
49
|
+
endpoint: Optional[str] = None,
|
|
50
|
+
dry_run: Literal[False] = False,
|
|
51
|
+
) -> str: ...
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@overload
|
|
55
|
+
def snapshot_download(
|
|
56
|
+
repo_id: str,
|
|
57
|
+
*,
|
|
58
|
+
repo_type: Optional[str] = None,
|
|
59
|
+
revision: Optional[str] = None,
|
|
60
|
+
cache_dir: Union[str, Path, None] = None,
|
|
61
|
+
local_dir: Union[str, Path, None] = None,
|
|
62
|
+
library_name: Optional[str] = None,
|
|
63
|
+
library_version: Optional[str] = None,
|
|
64
|
+
user_agent: Optional[Union[dict, str]] = None,
|
|
65
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
66
|
+
force_download: bool = False,
|
|
67
|
+
token: Optional[Union[bool, str]] = None,
|
|
68
|
+
local_files_only: bool = False,
|
|
69
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
70
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
71
|
+
max_workers: int = 8,
|
|
72
|
+
tqdm_class: Optional[type[base_tqdm]] = None,
|
|
73
|
+
headers: Optional[dict[str, str]] = None,
|
|
74
|
+
endpoint: Optional[str] = None,
|
|
75
|
+
dry_run: Literal[True] = True,
|
|
76
|
+
) -> list[DryRunFileInfo]: ...
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@overload
|
|
80
|
+
def snapshot_download(
|
|
81
|
+
repo_id: str,
|
|
82
|
+
*,
|
|
83
|
+
repo_type: Optional[str] = None,
|
|
84
|
+
revision: Optional[str] = None,
|
|
85
|
+
cache_dir: Union[str, Path, None] = None,
|
|
86
|
+
local_dir: Union[str, Path, None] = None,
|
|
87
|
+
library_name: Optional[str] = None,
|
|
88
|
+
library_version: Optional[str] = None,
|
|
89
|
+
user_agent: Optional[Union[dict, str]] = None,
|
|
90
|
+
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
91
|
+
force_download: bool = False,
|
|
92
|
+
token: Optional[Union[bool, str]] = None,
|
|
93
|
+
local_files_only: bool = False,
|
|
94
|
+
allow_patterns: Optional[Union[list[str], str]] = None,
|
|
95
|
+
ignore_patterns: Optional[Union[list[str], str]] = None,
|
|
96
|
+
max_workers: int = 8,
|
|
97
|
+
tqdm_class: Optional[type[base_tqdm]] = None,
|
|
98
|
+
headers: Optional[dict[str, str]] = None,
|
|
99
|
+
endpoint: Optional[str] = None,
|
|
100
|
+
dry_run: bool = False,
|
|
101
|
+
) -> Union[str, list[DryRunFileInfo]]: ...
|
|
102
|
+
|
|
103
|
+
|
|
28
104
|
@validate_hf_hub_args
|
|
29
105
|
def snapshot_download(
|
|
30
106
|
repo_id: str,
|
|
@@ -46,7 +122,8 @@ def snapshot_download(
|
|
|
46
122
|
tqdm_class: Optional[type[base_tqdm]] = None,
|
|
47
123
|
headers: Optional[dict[str, str]] = None,
|
|
48
124
|
endpoint: Optional[str] = None,
|
|
49
|
-
|
|
125
|
+
dry_run: bool = False,
|
|
126
|
+
) -> Union[str, list[DryRunFileInfo]]:
|
|
50
127
|
"""Download repo files.
|
|
51
128
|
|
|
52
129
|
Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
|
|
@@ -109,9 +186,14 @@ def snapshot_download(
|
|
|
109
186
|
Note that the `tqdm_class` is not passed to each individual download.
|
|
110
187
|
Defaults to the custom HF progress bar that can be disabled by setting
|
|
111
188
|
`HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
|
|
189
|
+
dry_run (`bool`, *optional*, defaults to `False`):
|
|
190
|
+
If `True`, perform a dry run without actually downloading the files. Returns a list of
|
|
191
|
+
[`DryRunFileInfo`] objects containing information about what would be downloaded.
|
|
112
192
|
|
|
113
193
|
Returns:
|
|
114
|
-
`str
|
|
194
|
+
`str` or list of [`DryRunFileInfo`]:
|
|
195
|
+
- If `dry_run=False`: Local snapshot path.
|
|
196
|
+
- If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information.
|
|
115
197
|
|
|
116
198
|
Raises:
|
|
117
199
|
[`~utils.RepositoryNotFoundError`]
|
|
@@ -187,6 +269,11 @@ def snapshot_download(
|
|
|
187
269
|
# - f the specified revision is a branch or tag, look inside "refs".
|
|
188
270
|
# => if local_dir is not None, we will return the path to the local folder if it exists.
|
|
189
271
|
if repo_info is None:
|
|
272
|
+
if dry_run:
|
|
273
|
+
raise DryRunError(
|
|
274
|
+
"Dry run cannot be performed as the repository cannot be accessed. Please check your internet connection or authentication token."
|
|
275
|
+
) from api_call_error
|
|
276
|
+
|
|
190
277
|
# Try to get which commit hash corresponds to the specified revision
|
|
191
278
|
commit_hash = None
|
|
192
279
|
if REGEX_COMMIT_HASH.match(revision):
|
|
@@ -273,6 +360,8 @@ def snapshot_download(
|
|
|
273
360
|
tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
|
|
274
361
|
else:
|
|
275
362
|
tqdm_desc = "Fetching ... files"
|
|
363
|
+
if dry_run:
|
|
364
|
+
tqdm_desc = "[dry-run] " + tqdm_desc
|
|
276
365
|
|
|
277
366
|
commit_hash = repo_info.sha
|
|
278
367
|
snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
|
|
@@ -288,28 +377,33 @@ def snapshot_download(
|
|
|
288
377
|
except OSError as e:
|
|
289
378
|
logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.")
|
|
290
379
|
|
|
380
|
+
results: List[Union[str, DryRunFileInfo]] = []
|
|
381
|
+
|
|
291
382
|
# we pass the commit_hash to hf_hub_download
|
|
292
383
|
# so no network call happens if we already
|
|
293
384
|
# have the file locally.
|
|
294
|
-
def _inner_hf_hub_download(repo_file: str):
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
385
|
+
def _inner_hf_hub_download(repo_file: str) -> None:
|
|
386
|
+
results.append(
|
|
387
|
+
hf_hub_download( # type: ignore[no-matching-overload] # ty not happy, don't know why :/
|
|
388
|
+
repo_id,
|
|
389
|
+
filename=repo_file,
|
|
390
|
+
repo_type=repo_type,
|
|
391
|
+
revision=commit_hash,
|
|
392
|
+
endpoint=endpoint,
|
|
393
|
+
cache_dir=cache_dir,
|
|
394
|
+
local_dir=local_dir,
|
|
395
|
+
library_name=library_name,
|
|
396
|
+
library_version=library_version,
|
|
397
|
+
user_agent=user_agent,
|
|
398
|
+
etag_timeout=etag_timeout,
|
|
399
|
+
force_download=force_download,
|
|
400
|
+
token=token,
|
|
401
|
+
headers=headers,
|
|
402
|
+
dry_run=dry_run,
|
|
403
|
+
)
|
|
310
404
|
)
|
|
311
405
|
|
|
312
|
-
if constants.HF_HUB_ENABLE_HF_TRANSFER:
|
|
406
|
+
if constants.HF_HUB_ENABLE_HF_TRANSFER and not dry_run:
|
|
313
407
|
# when using hf_transfer we don't want extra parallelism
|
|
314
408
|
# from the one hf_transfer provides
|
|
315
409
|
for file in filtered_repo_files:
|
|
@@ -324,6 +418,10 @@ def snapshot_download(
|
|
|
324
418
|
tqdm_class=tqdm_class or hf_tqdm,
|
|
325
419
|
)
|
|
326
420
|
|
|
421
|
+
if dry_run:
|
|
422
|
+
assert all(isinstance(r, DryRunFileInfo) for r in results)
|
|
423
|
+
return results # type: ignore
|
|
424
|
+
|
|
327
425
|
if local_dir is not None:
|
|
328
426
|
return str(os.path.realpath(local_dir))
|
|
329
427
|
return snapshot_folder
|
|
@@ -52,11 +52,8 @@ class HFSummaryWriter(_RuntimeSummaryWriter):
|
|
|
52
52
|
issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every`
|
|
53
53
|
minutes (default to every 5 minutes).
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
`HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
|
|
58
|
-
|
|
59
|
-
</Tip>
|
|
55
|
+
> [!WARNING]
|
|
56
|
+
> `HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
|
|
60
57
|
|
|
61
58
|
Args:
|
|
62
59
|
repo_id (`str`):
|
|
@@ -31,8 +31,7 @@ from . import constants
|
|
|
31
31
|
from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
|
|
32
32
|
from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
|
|
33
33
|
from .constants import DEFAULT_REVISION, REPO_TYPES
|
|
34
|
-
from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
|
|
35
|
-
from .utils._cache_manager import _format_size
|
|
34
|
+
from .utils import DEFAULT_IGNORE_PATTERNS, _format_size, filter_repo_objects, tqdm
|
|
36
35
|
from .utils._runtime import is_xet_available
|
|
37
36
|
from .utils.sha import sha_fileobj
|
|
38
37
|
|
|
@@ -53,17 +53,11 @@ class WebhooksServer:
|
|
|
53
53
|
Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
|
|
54
54
|
WebhooksServer and deploy it on a Space.
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
> [!WARNING]
|
|
57
|
+
> `WebhooksServer` is experimental. Its API is subject to change in the future.
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
</Tip>
|
|
61
|
-
|
|
62
|
-
<Tip warning={true}>
|
|
63
|
-
|
|
64
|
-
You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
|
|
65
|
-
|
|
66
|
-
</Tip>
|
|
59
|
+
> [!WARNING]
|
|
60
|
+
> You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
|
|
67
61
|
|
|
68
62
|
Args:
|
|
69
63
|
ui (`gradio.Blocks`, optional):
|
|
@@ -240,17 +234,11 @@ def webhook_endpoint(path: Optional[str] = None) -> Callable:
|
|
|
240
234
|
Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
|
|
241
235
|
server and deploy it on a Space.
|
|
242
236
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
`webhook_endpoint` is experimental. Its API is subject to change in the future.
|
|
246
|
-
|
|
247
|
-
</Tip>
|
|
248
|
-
|
|
249
|
-
<Tip warning={true}>
|
|
250
|
-
|
|
251
|
-
You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
|
|
237
|
+
> [!WARNING]
|
|
238
|
+
> `webhook_endpoint` is experimental. Its API is subject to change in the future.
|
|
252
239
|
|
|
253
|
-
|
|
240
|
+
> [!WARNING]
|
|
241
|
+
> You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
|
|
254
242
|
|
|
255
243
|
Args:
|
|
256
244
|
path (`str`, optional):
|