huggingface-hub 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +145 -46
- huggingface_hub/_commit_api.py +168 -119
- huggingface_hub/_commit_scheduler.py +15 -15
- huggingface_hub/_inference_endpoints.py +15 -12
- huggingface_hub/_jobs_api.py +301 -0
- huggingface_hub/_local_folder.py +18 -3
- huggingface_hub/_login.py +31 -63
- huggingface_hub/_oauth.py +460 -0
- huggingface_hub/_snapshot_download.py +239 -80
- huggingface_hub/_space_api.py +5 -5
- huggingface_hub/_tensorboard_logger.py +15 -19
- huggingface_hub/_upload_large_folder.py +172 -76
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +13 -25
- huggingface_hub/{commands → cli}/__init__.py +1 -15
- huggingface_hub/cli/_cli_utils.py +173 -0
- huggingface_hub/cli/auth.py +147 -0
- huggingface_hub/cli/cache.py +841 -0
- huggingface_hub/cli/download.py +189 -0
- huggingface_hub/cli/hf.py +60 -0
- huggingface_hub/cli/inference_endpoints.py +377 -0
- huggingface_hub/cli/jobs.py +772 -0
- huggingface_hub/cli/lfs.py +175 -0
- huggingface_hub/cli/repo.py +315 -0
- huggingface_hub/cli/repo_files.py +94 -0
- huggingface_hub/{commands/env.py → cli/system.py} +10 -13
- huggingface_hub/cli/upload.py +294 -0
- huggingface_hub/cli/upload_large_folder.py +117 -0
- huggingface_hub/community.py +20 -12
- huggingface_hub/constants.py +38 -53
- huggingface_hub/dataclasses.py +609 -0
- huggingface_hub/errors.py +80 -30
- huggingface_hub/fastai_utils.py +30 -41
- huggingface_hub/file_download.py +435 -351
- huggingface_hub/hf_api.py +2050 -1124
- huggingface_hub/hf_file_system.py +269 -152
- huggingface_hub/hub_mixin.py +43 -63
- huggingface_hub/inference/_client.py +347 -434
- huggingface_hub/inference/_common.py +133 -121
- huggingface_hub/inference/_generated/_async_client.py +397 -541
- huggingface_hub/inference/_generated/types/__init__.py +5 -1
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +59 -23
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
- huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/__init__.py +0 -0
- huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
- huggingface_hub/inference/_mcp/agent.py +100 -0
- huggingface_hub/inference/_mcp/cli.py +247 -0
- huggingface_hub/inference/_mcp/constants.py +81 -0
- huggingface_hub/inference/_mcp/mcp_client.py +395 -0
- huggingface_hub/inference/_mcp/types.py +45 -0
- huggingface_hub/inference/_mcp/utils.py +128 -0
- huggingface_hub/inference/_providers/__init__.py +82 -7
- huggingface_hub/inference/_providers/_common.py +129 -27
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cerebras.py +1 -1
- huggingface_hub/inference/_providers/clarifai.py +13 -0
- huggingface_hub/inference/_providers/cohere.py +20 -3
- huggingface_hub/inference/_providers/fal_ai.py +183 -56
- huggingface_hub/inference/_providers/featherless_ai.py +38 -0
- huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
- huggingface_hub/inference/_providers/groq.py +9 -0
- huggingface_hub/inference/_providers/hf_inference.py +69 -30
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +33 -5
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +44 -0
- huggingface_hub/inference/_providers/openai.py +3 -1
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/replicate.py +31 -13
- huggingface_hub/inference/_providers/sambanova.py +18 -4
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/together.py +20 -5
- huggingface_hub/inference/_providers/wavespeed.py +138 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +33 -100
- huggingface_hub/repocard.py +34 -38
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +12 -15
- huggingface_hub/serialization/_dduf.py +8 -8
- huggingface_hub/serialization/_torch.py +69 -69
- huggingface_hub/utils/__init__.py +19 -8
- huggingface_hub/utils/_auth.py +7 -7
- huggingface_hub/utils/_cache_manager.py +92 -147
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +55 -0
- huggingface_hub/utils/_experimental.py +7 -5
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +5 -5
- huggingface_hub/utils/_headers.py +8 -30
- huggingface_hub/utils/_http.py +398 -239
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +61 -24
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +4 -4
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +55 -74
- huggingface_hub/utils/_verification.py +167 -0
- huggingface_hub/utils/_xet.py +64 -17
- huggingface_hub/utils/_xet_progress_reporting.py +162 -0
- huggingface_hub/utils/insecure_hashlib.py +3 -5
- huggingface_hub/utils/logging.py +8 -11
- huggingface_hub/utils/tqdm.py +5 -4
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -85
- huggingface_hub-1.1.3.dist-info/RECORD +155 -0
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
- huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
- huggingface_hub/commands/delete_cache.py +0 -474
- huggingface_hub/commands/download.py +0 -200
- huggingface_hub/commands/huggingface_cli.py +0 -61
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo_files.py +0 -128
- huggingface_hub/commands/scan_cache.py +0 -181
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -314
- huggingface_hub/commands/upload_large_folder.py +0 -129
- huggingface_hub/commands/user.py +0 -304
- huggingface_hub/commands/version.py +0 -37
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.31.0rc0.dist-info/RECORD +0 -135
- huggingface_hub-0.31.0rc0.dist-info/entry_points.txt +0 -6
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
- {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
huggingface_hub/lfs.py
CHANGED
|
@@ -14,15 +14,12 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Git LFS related type definitions and utilities"""
|
|
16
16
|
|
|
17
|
-
import inspect
|
|
18
17
|
import io
|
|
19
18
|
import re
|
|
20
|
-
import warnings
|
|
21
19
|
from dataclasses import dataclass
|
|
22
20
|
from math import ceil
|
|
23
21
|
from os.path import getsize
|
|
24
|
-
from
|
|
25
|
-
from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Optional, Tuple, TypedDict
|
|
22
|
+
from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, TypedDict
|
|
26
23
|
from urllib.parse import unquote
|
|
27
24
|
|
|
28
25
|
from huggingface_hub import constants
|
|
@@ -34,12 +31,10 @@ from .utils import (
|
|
|
34
31
|
hf_raise_for_status,
|
|
35
32
|
http_backoff,
|
|
36
33
|
logging,
|
|
37
|
-
tqdm,
|
|
38
34
|
validate_hf_hub_args,
|
|
39
35
|
)
|
|
40
36
|
from .utils._lfs import SliceFileObj
|
|
41
37
|
from .utils.sha import sha256, sha_fileobj
|
|
42
|
-
from .utils.tqdm import is_tqdm_disabled
|
|
43
38
|
|
|
44
39
|
|
|
45
40
|
if TYPE_CHECKING:
|
|
@@ -107,8 +102,9 @@ def post_lfs_batch_info(
|
|
|
107
102
|
repo_id: str,
|
|
108
103
|
revision: Optional[str] = None,
|
|
109
104
|
endpoint: Optional[str] = None,
|
|
110
|
-
headers: Optional[
|
|
111
|
-
|
|
105
|
+
headers: Optional[dict[str, str]] = None,
|
|
106
|
+
transfers: Optional[list[str]] = None,
|
|
107
|
+
) -> tuple[list[dict], list[dict], Optional[str]]:
|
|
112
108
|
"""
|
|
113
109
|
Requests the LFS batch endpoint to retrieve upload instructions
|
|
114
110
|
|
|
@@ -127,16 +123,19 @@ def post_lfs_batch_info(
|
|
|
127
123
|
The git revision to upload to.
|
|
128
124
|
headers (`dict`, *optional*):
|
|
129
125
|
Additional headers to include in the request
|
|
126
|
+
transfers (`list`, *optional*):
|
|
127
|
+
List of transfer methods to use. Defaults to ["basic", "multipart"].
|
|
130
128
|
|
|
131
129
|
Returns:
|
|
132
|
-
`LfsBatchInfo`:
|
|
130
|
+
`LfsBatchInfo`: 3-tuple:
|
|
133
131
|
- First element is the list of upload instructions from the server
|
|
134
|
-
- Second element is
|
|
132
|
+
- Second element is a list of errors, if any
|
|
133
|
+
- Third element is the chosen transfer adapter if provided by the server (e.g. "basic", "multipart", "xet")
|
|
135
134
|
|
|
136
135
|
Raises:
|
|
137
136
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
138
137
|
If an argument is invalid or the server response is malformed.
|
|
139
|
-
[`
|
|
138
|
+
[`HfHubHTTPError`]
|
|
140
139
|
If the server returned an error.
|
|
141
140
|
"""
|
|
142
141
|
endpoint = endpoint if endpoint is not None else constants.ENDPOINT
|
|
@@ -144,9 +143,9 @@ def post_lfs_batch_info(
|
|
|
144
143
|
if repo_type in constants.REPO_TYPES_URL_PREFIXES:
|
|
145
144
|
url_prefix = constants.REPO_TYPES_URL_PREFIXES[repo_type]
|
|
146
145
|
batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch"
|
|
147
|
-
payload:
|
|
146
|
+
payload: dict = {
|
|
148
147
|
"operation": "upload",
|
|
149
|
-
"transfers": ["basic", "multipart"],
|
|
148
|
+
"transfers": transfers if transfers is not None else ["basic", "multipart"],
|
|
150
149
|
"objects": [
|
|
151
150
|
{
|
|
152
151
|
"oid": upload.sha256.hex(),
|
|
@@ -172,9 +171,13 @@ def post_lfs_batch_info(
|
|
|
172
171
|
if not isinstance(objects, list):
|
|
173
172
|
raise ValueError("Malformed response from server")
|
|
174
173
|
|
|
174
|
+
chosen_transfer = batch_info.get("transfer")
|
|
175
|
+
chosen_transfer = chosen_transfer if isinstance(chosen_transfer, str) else None
|
|
176
|
+
|
|
175
177
|
return (
|
|
176
178
|
[_validate_batch_actions(obj) for obj in objects if "error" not in obj],
|
|
177
179
|
[_validate_batch_error(obj) for obj in objects if "error" in obj],
|
|
180
|
+
chosen_transfer,
|
|
178
181
|
)
|
|
179
182
|
|
|
180
183
|
|
|
@@ -187,14 +190,14 @@ class CompletionPayloadT(TypedDict):
|
|
|
187
190
|
"""Payload that will be sent to the Hub when uploading multi-part."""
|
|
188
191
|
|
|
189
192
|
oid: str
|
|
190
|
-
parts:
|
|
193
|
+
parts: list[PayloadPartT]
|
|
191
194
|
|
|
192
195
|
|
|
193
196
|
def lfs_upload(
|
|
194
197
|
operation: "CommitOperationAdd",
|
|
195
|
-
lfs_batch_action:
|
|
198
|
+
lfs_batch_action: dict,
|
|
196
199
|
token: Optional[str] = None,
|
|
197
|
-
headers: Optional[
|
|
200
|
+
headers: Optional[dict[str, str]] = None,
|
|
198
201
|
endpoint: Optional[str] = None,
|
|
199
202
|
) -> None:
|
|
200
203
|
"""
|
|
@@ -214,7 +217,7 @@ def lfs_upload(
|
|
|
214
217
|
Raises:
|
|
215
218
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
216
219
|
If `lfs_batch_action` is improperly formatted
|
|
217
|
-
[`
|
|
220
|
+
[`HfHubHTTPError`]
|
|
218
221
|
If the upload resulted in an error
|
|
219
222
|
"""
|
|
220
223
|
# 0. If LFS file is already present, skip upload
|
|
@@ -308,42 +311,26 @@ def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> Non
|
|
|
308
311
|
fileobj:
|
|
309
312
|
The file-like object holding the data to upload.
|
|
310
313
|
|
|
311
|
-
Returns: `requests.Response`
|
|
312
|
-
|
|
313
314
|
Raises:
|
|
314
|
-
|
|
315
|
-
|
|
315
|
+
[`HfHubHTTPError`]
|
|
316
|
+
If the upload resulted in an error.
|
|
316
317
|
"""
|
|
317
318
|
with operation.as_file(with_tqdm=True) as fileobj:
|
|
318
319
|
# S3 might raise a transient 500 error -> let's retry if that happens
|
|
319
|
-
response = http_backoff("PUT", upload_url, data=fileobj
|
|
320
|
+
response = http_backoff("PUT", upload_url, data=fileobj)
|
|
320
321
|
hf_raise_for_status(response)
|
|
321
322
|
|
|
322
323
|
|
|
323
|
-
def _upload_multi_part(operation: "CommitOperationAdd", header:
|
|
324
|
+
def _upload_multi_part(operation: "CommitOperationAdd", header: dict, chunk_size: int, upload_url: str) -> None:
|
|
324
325
|
"""
|
|
325
326
|
Uploads file using HF multipart LFS transfer protocol.
|
|
326
327
|
"""
|
|
327
328
|
# 1. Get upload URLs for each part
|
|
328
329
|
sorted_parts_urls = _get_sorted_parts_urls(header=header, upload_info=operation.upload_info, chunk_size=chunk_size)
|
|
329
330
|
|
|
330
|
-
# 2. Upload parts (
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
constants.HF_HUB_ENABLE_HF_TRANSFER
|
|
334
|
-
and not isinstance(operation.path_or_fileobj, str)
|
|
335
|
-
and not isinstance(operation.path_or_fileobj, Path)
|
|
336
|
-
):
|
|
337
|
-
warnings.warn(
|
|
338
|
-
"hf_transfer is enabled but does not support uploading from bytes or BinaryIO, falling back to regular"
|
|
339
|
-
" upload"
|
|
340
|
-
)
|
|
341
|
-
use_hf_transfer = False
|
|
342
|
-
|
|
343
|
-
response_headers = (
|
|
344
|
-
_upload_parts_hf_transfer(operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size)
|
|
345
|
-
if use_hf_transfer
|
|
346
|
-
else _upload_parts_iteratively(operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size)
|
|
331
|
+
# 2. Upload parts (pure Python)
|
|
332
|
+
response_headers = _upload_parts_iteratively(
|
|
333
|
+
operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size
|
|
347
334
|
)
|
|
348
335
|
|
|
349
336
|
# 3. Send completion request
|
|
@@ -355,7 +342,7 @@ def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size
|
|
|
355
342
|
hf_raise_for_status(completion_res)
|
|
356
343
|
|
|
357
344
|
|
|
358
|
-
def _get_sorted_parts_urls(header:
|
|
345
|
+
def _get_sorted_parts_urls(header: dict, upload_info: UploadInfo, chunk_size: int) -> list[str]:
|
|
359
346
|
sorted_part_upload_urls = [
|
|
360
347
|
upload_url
|
|
361
348
|
for _, upload_url in sorted(
|
|
@@ -373,8 +360,8 @@ def _get_sorted_parts_urls(header: Dict, upload_info: UploadInfo, chunk_size: in
|
|
|
373
360
|
return sorted_part_upload_urls
|
|
374
361
|
|
|
375
362
|
|
|
376
|
-
def _get_completion_payload(response_headers:
|
|
377
|
-
parts:
|
|
363
|
+
def _get_completion_payload(response_headers: list[dict], oid: str) -> CompletionPayloadT:
|
|
364
|
+
parts: list[PayloadPartT] = []
|
|
378
365
|
for part_number, header in enumerate(response_headers):
|
|
379
366
|
etag = header.get("etag")
|
|
380
367
|
if etag is None or etag == "":
|
|
@@ -389,8 +376,8 @@ def _get_completion_payload(response_headers: List[Dict], oid: str) -> Completio
|
|
|
389
376
|
|
|
390
377
|
|
|
391
378
|
def _upload_parts_iteratively(
|
|
392
|
-
operation: "CommitOperationAdd", sorted_parts_urls:
|
|
393
|
-
) ->
|
|
379
|
+
operation: "CommitOperationAdd", sorted_parts_urls: list[str], chunk_size: int
|
|
380
|
+
) -> list[dict]:
|
|
394
381
|
headers = []
|
|
395
382
|
with operation.as_file(with_tqdm=True) as fileobj:
|
|
396
383
|
for part_idx, part_upload_url in enumerate(sorted_parts_urls):
|
|
@@ -400,61 +387,7 @@ def _upload_parts_iteratively(
|
|
|
400
387
|
read_limit=chunk_size,
|
|
401
388
|
) as fileobj_slice:
|
|
402
389
|
# S3 might raise a transient 500 error -> let's retry if that happens
|
|
403
|
-
part_upload_res = http_backoff(
|
|
404
|
-
"PUT", part_upload_url, data=fileobj_slice, retry_on_status_codes=(500, 502, 503, 504)
|
|
405
|
-
)
|
|
390
|
+
part_upload_res = http_backoff("PUT", part_upload_url, data=fileobj_slice)
|
|
406
391
|
hf_raise_for_status(part_upload_res)
|
|
407
392
|
headers.append(part_upload_res.headers)
|
|
408
393
|
return headers # type: ignore
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
def _upload_parts_hf_transfer(
|
|
412
|
-
operation: "CommitOperationAdd", sorted_parts_urls: List[str], chunk_size: int
|
|
413
|
-
) -> List[Dict]:
|
|
414
|
-
# Upload file using an external Rust-based package. Upload is faster but support less features (no progress bars).
|
|
415
|
-
try:
|
|
416
|
-
from hf_transfer import multipart_upload
|
|
417
|
-
except ImportError:
|
|
418
|
-
raise ValueError(
|
|
419
|
-
"Fast uploading using 'hf_transfer' is enabled (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is"
|
|
420
|
-
" not available in your environment. Try `pip install hf_transfer`."
|
|
421
|
-
)
|
|
422
|
-
|
|
423
|
-
supports_callback = "callback" in inspect.signature(multipart_upload).parameters
|
|
424
|
-
if not supports_callback:
|
|
425
|
-
warnings.warn(
|
|
426
|
-
"You are using an outdated version of `hf_transfer`. Consider upgrading to latest version to enable progress bars using `pip install -U hf_transfer`."
|
|
427
|
-
)
|
|
428
|
-
|
|
429
|
-
total = operation.upload_info.size
|
|
430
|
-
desc = operation.path_in_repo
|
|
431
|
-
if len(desc) > 40:
|
|
432
|
-
desc = f"(…){desc[-40:]}"
|
|
433
|
-
|
|
434
|
-
with tqdm(
|
|
435
|
-
unit="B",
|
|
436
|
-
unit_scale=True,
|
|
437
|
-
total=total,
|
|
438
|
-
initial=0,
|
|
439
|
-
desc=desc,
|
|
440
|
-
disable=is_tqdm_disabled(logger.getEffectiveLevel()),
|
|
441
|
-
name="huggingface_hub.lfs_upload",
|
|
442
|
-
) as progress:
|
|
443
|
-
try:
|
|
444
|
-
output = multipart_upload(
|
|
445
|
-
file_path=operation.path_or_fileobj,
|
|
446
|
-
parts_urls=sorted_parts_urls,
|
|
447
|
-
chunk_size=chunk_size,
|
|
448
|
-
max_files=128,
|
|
449
|
-
parallel_failures=127, # could be removed
|
|
450
|
-
max_retries=5,
|
|
451
|
-
**({"callback": progress.update} if supports_callback else {}),
|
|
452
|
-
)
|
|
453
|
-
except Exception as e:
|
|
454
|
-
raise RuntimeError(
|
|
455
|
-
"An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for"
|
|
456
|
-
" better error handling."
|
|
457
|
-
) from e
|
|
458
|
-
if not supports_callback:
|
|
459
|
-
progress.update(total)
|
|
460
|
-
return output
|
huggingface_hub/repocard.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Literal, Optional, Union
|
|
5
5
|
|
|
6
|
-
import requests
|
|
7
6
|
import yaml
|
|
8
7
|
|
|
9
8
|
from huggingface_hub.file_download import hf_hub_download
|
|
@@ -17,7 +16,7 @@ from huggingface_hub.repocard_data import (
|
|
|
17
16
|
eval_results_to_model_index,
|
|
18
17
|
model_index_to_eval_results,
|
|
19
18
|
)
|
|
20
|
-
from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
|
|
19
|
+
from huggingface_hub.utils import HfHubHTTPError, get_session, hf_raise_for_status, is_jinja_available, yaml_dump
|
|
21
20
|
|
|
22
21
|
from . import constants
|
|
23
22
|
from .errors import EntryNotFoundError
|
|
@@ -65,13 +64,11 @@ class RepoCard:
|
|
|
65
64
|
'\\n# My repo\\n'
|
|
66
65
|
|
|
67
66
|
```
|
|
68
|
-
|
|
69
|
-
Raises the following error:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
</Tip>
|
|
67
|
+
> [!TIP]
|
|
68
|
+
> Raises the following error:
|
|
69
|
+
>
|
|
70
|
+
> - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
71
|
+
> when the content of the repo card metadata is not a dictionary.
|
|
75
72
|
"""
|
|
76
73
|
|
|
77
74
|
# Set the content of the RepoCard, as well as underlying .data and .text attributes.
|
|
@@ -149,7 +146,7 @@ class RepoCard:
|
|
|
149
146
|
repo_id_or_path (`Union[str, Path]`):
|
|
150
147
|
The repo ID associated with a Hugging Face Hub repo or a local filepath.
|
|
151
148
|
repo_type (`str`, *optional*):
|
|
152
|
-
The type of Hugging Face repo to push to. Defaults to None, which will use
|
|
149
|
+
The type of Hugging Face repo to push to. Defaults to None, which will use "model". Other options
|
|
153
150
|
are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
|
|
154
151
|
class, the default value will be the child class's `repo_type`.
|
|
155
152
|
token (`str`, *optional*):
|
|
@@ -199,15 +196,13 @@ class RepoCard:
|
|
|
199
196
|
The type of Hugging Face repo to push to. Options are "model", "dataset", and "space".
|
|
200
197
|
If this function is called from a child class, the default will be the child class's `repo_type`.
|
|
201
198
|
|
|
202
|
-
|
|
203
|
-
Raises the following errors:
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
</Tip>
|
|
199
|
+
> [!TIP]
|
|
200
|
+
> Raises the following errors:
|
|
201
|
+
>
|
|
202
|
+
> - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
203
|
+
> if the card fails validation checks.
|
|
204
|
+
> - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
|
|
205
|
+
> if the request to the Hub API fails for any other reason.
|
|
211
206
|
"""
|
|
212
207
|
|
|
213
208
|
# If repo type is provided, otherwise, use the repo type of the card.
|
|
@@ -220,11 +215,11 @@ class RepoCard:
|
|
|
220
215
|
headers = {"Accept": "text/plain"}
|
|
221
216
|
|
|
222
217
|
try:
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
except
|
|
226
|
-
if
|
|
227
|
-
raise ValueError(
|
|
218
|
+
response = get_session().post("https://huggingface.co/api/validate-yaml", json=body, headers=headers)
|
|
219
|
+
hf_raise_for_status(response)
|
|
220
|
+
except HfHubHTTPError as exc:
|
|
221
|
+
if response.status_code == 400:
|
|
222
|
+
raise ValueError(response.text)
|
|
228
223
|
else:
|
|
229
224
|
raise exc
|
|
230
225
|
|
|
@@ -263,7 +258,7 @@ class RepoCard:
|
|
|
263
258
|
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
|
|
264
259
|
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
|
|
265
260
|
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
|
|
266
|
-
especially useful if the repo is updated / committed
|
|
261
|
+
especially useful if the repo is updated / committed too concurrently.
|
|
267
262
|
Returns:
|
|
268
263
|
`str`: URL of the commit which updated the card metadata.
|
|
269
264
|
"""
|
|
@@ -276,7 +271,7 @@ class RepoCard:
|
|
|
276
271
|
|
|
277
272
|
with SoftTemporaryDirectory() as tmpdir:
|
|
278
273
|
tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
|
|
279
|
-
tmp_path.write_text(str(self))
|
|
274
|
+
tmp_path.write_text(str(self), encoding="utf-8")
|
|
280
275
|
url = upload_file(
|
|
281
276
|
path_or_fileobj=str(tmp_path),
|
|
282
277
|
path_in_repo=constants.REPOCARD_NAME,
|
|
@@ -336,7 +331,7 @@ class RepoCard:
|
|
|
336
331
|
|
|
337
332
|
|
|
338
333
|
class ModelCard(RepoCard):
|
|
339
|
-
card_data_class = ModelCardData
|
|
334
|
+
card_data_class = ModelCardData # type: ignore[assignment]
|
|
340
335
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
341
336
|
repo_type = "model"
|
|
342
337
|
|
|
@@ -417,7 +412,7 @@ class ModelCard(RepoCard):
|
|
|
417
412
|
|
|
418
413
|
|
|
419
414
|
class DatasetCard(RepoCard):
|
|
420
|
-
card_data_class = DatasetCardData
|
|
415
|
+
card_data_class = DatasetCardData # type: ignore[assignment]
|
|
421
416
|
default_template_path = TEMPLATE_DATASETCARD_PATH
|
|
422
417
|
repo_type = "dataset"
|
|
423
418
|
|
|
@@ -482,7 +477,7 @@ class DatasetCard(RepoCard):
|
|
|
482
477
|
|
|
483
478
|
|
|
484
479
|
class SpaceCard(RepoCard):
|
|
485
|
-
card_data_class = SpaceCardData
|
|
480
|
+
card_data_class = SpaceCardData # type: ignore[assignment]
|
|
486
481
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
487
482
|
repo_type = "space"
|
|
488
483
|
|
|
@@ -508,7 +503,7 @@ def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # n
|
|
|
508
503
|
return "\n"
|
|
509
504
|
|
|
510
505
|
|
|
511
|
-
def metadata_load(local_path: Union[str, Path]) -> Optional[
|
|
506
|
+
def metadata_load(local_path: Union[str, Path]) -> Optional[dict]:
|
|
512
507
|
content = Path(local_path).read_text()
|
|
513
508
|
match = REGEX_YAML_BLOCK.search(content)
|
|
514
509
|
if match:
|
|
@@ -521,7 +516,7 @@ def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
|
|
|
521
516
|
return None
|
|
522
517
|
|
|
523
518
|
|
|
524
|
-
def metadata_save(local_path: Union[str, Path], data:
|
|
519
|
+
def metadata_save(local_path: Union[str, Path], data: dict) -> None:
|
|
525
520
|
"""
|
|
526
521
|
Save the metadata dict in the upper YAML part Trying to preserve newlines as
|
|
527
522
|
in the existing file. Docs about open() with newline="" parameter:
|
|
@@ -569,7 +564,7 @@ def metadata_eval_result(
|
|
|
569
564
|
dataset_split: Optional[str] = None,
|
|
570
565
|
dataset_revision: Optional[str] = None,
|
|
571
566
|
metrics_verification_token: Optional[str] = None,
|
|
572
|
-
) ->
|
|
567
|
+
) -> dict:
|
|
573
568
|
"""
|
|
574
569
|
Creates a metadata dict with the result from a model evaluated on a dataset.
|
|
575
570
|
|
|
@@ -684,7 +679,7 @@ def metadata_eval_result(
|
|
|
684
679
|
@validate_hf_hub_args
|
|
685
680
|
def metadata_update(
|
|
686
681
|
repo_id: str,
|
|
687
|
-
metadata:
|
|
682
|
+
metadata: dict,
|
|
688
683
|
*,
|
|
689
684
|
repo_type: Optional[str] = None,
|
|
690
685
|
overwrite: bool = False,
|
|
@@ -697,7 +692,7 @@ def metadata_update(
|
|
|
697
692
|
) -> str:
|
|
698
693
|
"""
|
|
699
694
|
Updates the metadata in the README.md of a repository on the Hugging Face Hub.
|
|
700
|
-
If the README.md file doesn't exist yet, a new one is created with metadata and
|
|
695
|
+
If the README.md file doesn't exist yet, a new one is created with metadata and
|
|
701
696
|
the default ModelCard or DatasetCard template. For `space` repo, an error is thrown
|
|
702
697
|
as a Space cannot exist without a `README.md` file.
|
|
703
698
|
|
|
@@ -730,7 +725,7 @@ def metadata_update(
|
|
|
730
725
|
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
|
|
731
726
|
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
|
|
732
727
|
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
|
|
733
|
-
especially useful if the repo is updated / committed
|
|
728
|
+
especially useful if the repo is updated / committed too concurrently.
|
|
734
729
|
Returns:
|
|
735
730
|
`str`: URL of the commit which updated the card metadata.
|
|
736
731
|
|
|
@@ -752,7 +747,7 @@ def metadata_update(
|
|
|
752
747
|
commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
|
|
753
748
|
|
|
754
749
|
# Card class given repo_type
|
|
755
|
-
card_class:
|
|
750
|
+
card_class: type[RepoCard]
|
|
756
751
|
if repo_type is None or repo_type == "model":
|
|
757
752
|
card_class = ModelCard
|
|
758
753
|
elif repo_type == "dataset":
|
|
@@ -771,7 +766,8 @@ def metadata_update(
|
|
|
771
766
|
raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
|
|
772
767
|
|
|
773
768
|
# Initialize a ModelCard or DatasetCard from default template and no data.
|
|
774
|
-
card
|
|
769
|
+
# Cast to the concrete expected card type to satisfy type checkers.
|
|
770
|
+
card = card_class.from_template(CardData()) # type: ignore[return-value]
|
|
775
771
|
|
|
776
772
|
for key, value in metadata.items():
|
|
777
773
|
if key == "model-index":
|