huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +46 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +176 -20
- huggingface_hub/_local_folder.py +1 -1
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +10 -14
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +13 -14
- huggingface_hub/_upload_large_folder.py +120 -13
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/_cli_utils.py +2 -2
- huggingface_hub/cli/auth.py +8 -6
- huggingface_hub/cli/cache.py +18 -20
- huggingface_hub/cli/download.py +4 -4
- huggingface_hub/cli/hf.py +2 -5
- huggingface_hub/cli/jobs.py +599 -22
- huggingface_hub/cli/lfs.py +4 -4
- huggingface_hub/cli/repo.py +11 -7
- huggingface_hub/cli/repo_files.py +2 -2
- huggingface_hub/cli/upload.py +4 -4
- huggingface_hub/cli/upload_large_folder.py +3 -3
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +13 -13
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -19
- huggingface_hub/errors.py +42 -29
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +162 -259
- huggingface_hub/hf_api.py +841 -616
- huggingface_hub/hf_file_system.py +98 -62
- huggingface_hub/hub_mixin.py +37 -57
- huggingface_hub/inference/_client.py +257 -325
- huggingface_hub/inference/_common.py +110 -124
- huggingface_hub/inference/_generated/_async_client.py +307 -432
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +2 -3
- huggingface_hub/inference/_mcp/mcp_client.py +58 -30
- huggingface_hub/inference/_mcp/types.py +10 -7
- huggingface_hub/inference/_mcp/utils.py +11 -7
- huggingface_hub/inference/_providers/__init__.py +4 -2
- huggingface_hub/inference/_providers/_common.py +49 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +52 -21
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +28 -20
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +20 -31
- huggingface_hub/repocard.py +18 -18
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +25 -21
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +4 -4
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +366 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +16 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +32 -11
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
- huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/lfs.py
CHANGED
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Git LFS related type definitions and utilities"""
|
|
16
16
|
|
|
17
|
-
import inspect
|
|
18
17
|
import io
|
|
19
18
|
import re
|
|
20
19
|
import warnings
|
|
@@ -22,7 +21,7 @@ from dataclasses import dataclass
|
|
|
22
21
|
from math import ceil
|
|
23
22
|
from os.path import getsize
|
|
24
23
|
from pathlib import Path
|
|
25
|
-
from typing import TYPE_CHECKING, BinaryIO,
|
|
24
|
+
from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, TypedDict
|
|
26
25
|
from urllib.parse import unquote
|
|
27
26
|
|
|
28
27
|
from huggingface_hub import constants
|
|
@@ -107,8 +106,8 @@ def post_lfs_batch_info(
|
|
|
107
106
|
repo_id: str,
|
|
108
107
|
revision: Optional[str] = None,
|
|
109
108
|
endpoint: Optional[str] = None,
|
|
110
|
-
headers: Optional[
|
|
111
|
-
) ->
|
|
109
|
+
headers: Optional[dict[str, str]] = None,
|
|
110
|
+
) -> tuple[list[dict], list[dict]]:
|
|
112
111
|
"""
|
|
113
112
|
Requests the LFS batch endpoint to retrieve upload instructions
|
|
114
113
|
|
|
@@ -136,7 +135,7 @@ def post_lfs_batch_info(
|
|
|
136
135
|
Raises:
|
|
137
136
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
138
137
|
If an argument is invalid or the server response is malformed.
|
|
139
|
-
[`
|
|
138
|
+
[`HfHubHTTPError`]
|
|
140
139
|
If the server returned an error.
|
|
141
140
|
"""
|
|
142
141
|
endpoint = endpoint if endpoint is not None else constants.ENDPOINT
|
|
@@ -144,7 +143,7 @@ def post_lfs_batch_info(
|
|
|
144
143
|
if repo_type in constants.REPO_TYPES_URL_PREFIXES:
|
|
145
144
|
url_prefix = constants.REPO_TYPES_URL_PREFIXES[repo_type]
|
|
146
145
|
batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch"
|
|
147
|
-
payload:
|
|
146
|
+
payload: dict = {
|
|
148
147
|
"operation": "upload",
|
|
149
148
|
"transfers": ["basic", "multipart"],
|
|
150
149
|
"objects": [
|
|
@@ -187,14 +186,14 @@ class CompletionPayloadT(TypedDict):
|
|
|
187
186
|
"""Payload that will be sent to the Hub when uploading multi-part."""
|
|
188
187
|
|
|
189
188
|
oid: str
|
|
190
|
-
parts:
|
|
189
|
+
parts: list[PayloadPartT]
|
|
191
190
|
|
|
192
191
|
|
|
193
192
|
def lfs_upload(
|
|
194
193
|
operation: "CommitOperationAdd",
|
|
195
|
-
lfs_batch_action:
|
|
194
|
+
lfs_batch_action: dict,
|
|
196
195
|
token: Optional[str] = None,
|
|
197
|
-
headers: Optional[
|
|
196
|
+
headers: Optional[dict[str, str]] = None,
|
|
198
197
|
endpoint: Optional[str] = None,
|
|
199
198
|
) -> None:
|
|
200
199
|
"""
|
|
@@ -214,7 +213,7 @@ def lfs_upload(
|
|
|
214
213
|
Raises:
|
|
215
214
|
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
216
215
|
If `lfs_batch_action` is improperly formatted
|
|
217
|
-
[`
|
|
216
|
+
[`HfHubHTTPError`]
|
|
218
217
|
If the upload resulted in an error
|
|
219
218
|
"""
|
|
220
219
|
# 0. If LFS file is already present, skip upload
|
|
@@ -308,11 +307,9 @@ def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> Non
|
|
|
308
307
|
fileobj:
|
|
309
308
|
The file-like object holding the data to upload.
|
|
310
309
|
|
|
311
|
-
Returns: `requests.Response`
|
|
312
|
-
|
|
313
310
|
Raises:
|
|
314
|
-
|
|
315
|
-
|
|
311
|
+
[`HfHubHTTPError`]
|
|
312
|
+
If the upload resulted in an error.
|
|
316
313
|
"""
|
|
317
314
|
with operation.as_file(with_tqdm=True) as fileobj:
|
|
318
315
|
# S3 might raise a transient 500 error -> let's retry if that happens
|
|
@@ -320,7 +317,7 @@ def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> Non
|
|
|
320
317
|
hf_raise_for_status(response)
|
|
321
318
|
|
|
322
319
|
|
|
323
|
-
def _upload_multi_part(operation: "CommitOperationAdd", header:
|
|
320
|
+
def _upload_multi_part(operation: "CommitOperationAdd", header: dict, chunk_size: int, upload_url: str) -> None:
|
|
324
321
|
"""
|
|
325
322
|
Uploads file using HF multipart LFS transfer protocol.
|
|
326
323
|
"""
|
|
@@ -355,7 +352,7 @@ def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size
|
|
|
355
352
|
hf_raise_for_status(completion_res)
|
|
356
353
|
|
|
357
354
|
|
|
358
|
-
def _get_sorted_parts_urls(header:
|
|
355
|
+
def _get_sorted_parts_urls(header: dict, upload_info: UploadInfo, chunk_size: int) -> list[str]:
|
|
359
356
|
sorted_part_upload_urls = [
|
|
360
357
|
upload_url
|
|
361
358
|
for _, upload_url in sorted(
|
|
@@ -373,8 +370,8 @@ def _get_sorted_parts_urls(header: Dict, upload_info: UploadInfo, chunk_size: in
|
|
|
373
370
|
return sorted_part_upload_urls
|
|
374
371
|
|
|
375
372
|
|
|
376
|
-
def _get_completion_payload(response_headers:
|
|
377
|
-
parts:
|
|
373
|
+
def _get_completion_payload(response_headers: list[dict], oid: str) -> CompletionPayloadT:
|
|
374
|
+
parts: list[PayloadPartT] = []
|
|
378
375
|
for part_number, header in enumerate(response_headers):
|
|
379
376
|
etag = header.get("etag")
|
|
380
377
|
if etag is None or etag == "":
|
|
@@ -389,8 +386,8 @@ def _get_completion_payload(response_headers: List[Dict], oid: str) -> Completio
|
|
|
389
386
|
|
|
390
387
|
|
|
391
388
|
def _upload_parts_iteratively(
|
|
392
|
-
operation: "CommitOperationAdd", sorted_parts_urls:
|
|
393
|
-
) ->
|
|
389
|
+
operation: "CommitOperationAdd", sorted_parts_urls: list[str], chunk_size: int
|
|
390
|
+
) -> list[dict]:
|
|
394
391
|
headers = []
|
|
395
392
|
with operation.as_file(with_tqdm=True) as fileobj:
|
|
396
393
|
for part_idx, part_upload_url in enumerate(sorted_parts_urls):
|
|
@@ -409,8 +406,8 @@ def _upload_parts_iteratively(
|
|
|
409
406
|
|
|
410
407
|
|
|
411
408
|
def _upload_parts_hf_transfer(
|
|
412
|
-
operation: "CommitOperationAdd", sorted_parts_urls:
|
|
413
|
-
) ->
|
|
409
|
+
operation: "CommitOperationAdd", sorted_parts_urls: list[str], chunk_size: int
|
|
410
|
+
) -> list[dict]:
|
|
414
411
|
# Upload file using an external Rust-based package. Upload is faster but support less features (no progress bars).
|
|
415
412
|
try:
|
|
416
413
|
from hf_transfer import multipart_upload
|
|
@@ -420,12 +417,6 @@ def _upload_parts_hf_transfer(
|
|
|
420
417
|
" not available in your environment. Try `pip install hf_transfer`."
|
|
421
418
|
)
|
|
422
419
|
|
|
423
|
-
supports_callback = "callback" in inspect.signature(multipart_upload).parameters
|
|
424
|
-
if not supports_callback:
|
|
425
|
-
warnings.warn(
|
|
426
|
-
"You are using an outdated version of `hf_transfer`. Consider upgrading to latest version to enable progress bars using `pip install -U hf_transfer`."
|
|
427
|
-
)
|
|
428
|
-
|
|
429
420
|
total = operation.upload_info.size
|
|
430
421
|
desc = operation.path_in_repo
|
|
431
422
|
if len(desc) > 40:
|
|
@@ -448,13 +439,11 @@ def _upload_parts_hf_transfer(
|
|
|
448
439
|
max_files=128,
|
|
449
440
|
parallel_failures=127, # could be removed
|
|
450
441
|
max_retries=5,
|
|
451
|
-
|
|
442
|
+
callback=progress.update,
|
|
452
443
|
)
|
|
453
444
|
except Exception as e:
|
|
454
445
|
raise RuntimeError(
|
|
455
446
|
"An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for"
|
|
456
447
|
" better error handling."
|
|
457
448
|
) from e
|
|
458
|
-
if not supports_callback:
|
|
459
|
-
progress.update(total)
|
|
460
449
|
return output
|
huggingface_hub/repocard.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Literal, Optional, Union
|
|
5
5
|
|
|
6
|
-
import requests
|
|
7
6
|
import yaml
|
|
8
7
|
|
|
9
8
|
from huggingface_hub.file_download import hf_hub_download
|
|
@@ -17,7 +16,7 @@ from huggingface_hub.repocard_data import (
|
|
|
17
16
|
eval_results_to_model_index,
|
|
18
17
|
model_index_to_eval_results,
|
|
19
18
|
)
|
|
20
|
-
from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
|
|
19
|
+
from huggingface_hub.utils import HfHubHTTPError, get_session, hf_raise_for_status, is_jinja_available, yaml_dump
|
|
21
20
|
|
|
22
21
|
from . import constants
|
|
23
22
|
from .errors import EntryNotFoundError
|
|
@@ -204,7 +203,7 @@ class RepoCard:
|
|
|
204
203
|
|
|
205
204
|
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
206
205
|
if the card fails validation checks.
|
|
207
|
-
- [`
|
|
206
|
+
- [`HfHubHTTPError`]
|
|
208
207
|
if the request to the Hub API fails for any other reason.
|
|
209
208
|
|
|
210
209
|
</Tip>
|
|
@@ -220,11 +219,11 @@ class RepoCard:
|
|
|
220
219
|
headers = {"Accept": "text/plain"}
|
|
221
220
|
|
|
222
221
|
try:
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
except
|
|
226
|
-
if
|
|
227
|
-
raise ValueError(
|
|
222
|
+
response = get_session().post("https://huggingface.co/api/validate-yaml", json=body, headers=headers)
|
|
223
|
+
hf_raise_for_status(response)
|
|
224
|
+
except HfHubHTTPError as exc:
|
|
225
|
+
if response.status_code == 400:
|
|
226
|
+
raise ValueError(response.text)
|
|
228
227
|
else:
|
|
229
228
|
raise exc
|
|
230
229
|
|
|
@@ -336,7 +335,7 @@ class RepoCard:
|
|
|
336
335
|
|
|
337
336
|
|
|
338
337
|
class ModelCard(RepoCard):
|
|
339
|
-
card_data_class = ModelCardData
|
|
338
|
+
card_data_class = ModelCardData # type: ignore[assignment]
|
|
340
339
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
341
340
|
repo_type = "model"
|
|
342
341
|
|
|
@@ -417,7 +416,7 @@ class ModelCard(RepoCard):
|
|
|
417
416
|
|
|
418
417
|
|
|
419
418
|
class DatasetCard(RepoCard):
|
|
420
|
-
card_data_class = DatasetCardData
|
|
419
|
+
card_data_class = DatasetCardData # type: ignore[assignment]
|
|
421
420
|
default_template_path = TEMPLATE_DATASETCARD_PATH
|
|
422
421
|
repo_type = "dataset"
|
|
423
422
|
|
|
@@ -482,7 +481,7 @@ class DatasetCard(RepoCard):
|
|
|
482
481
|
|
|
483
482
|
|
|
484
483
|
class SpaceCard(RepoCard):
|
|
485
|
-
card_data_class = SpaceCardData
|
|
484
|
+
card_data_class = SpaceCardData # type: ignore[assignment]
|
|
486
485
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
487
486
|
repo_type = "space"
|
|
488
487
|
|
|
@@ -508,7 +507,7 @@ def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # n
|
|
|
508
507
|
return "\n"
|
|
509
508
|
|
|
510
509
|
|
|
511
|
-
def metadata_load(local_path: Union[str, Path]) -> Optional[
|
|
510
|
+
def metadata_load(local_path: Union[str, Path]) -> Optional[dict]:
|
|
512
511
|
content = Path(local_path).read_text()
|
|
513
512
|
match = REGEX_YAML_BLOCK.search(content)
|
|
514
513
|
if match:
|
|
@@ -521,7 +520,7 @@ def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
|
|
|
521
520
|
return None
|
|
522
521
|
|
|
523
522
|
|
|
524
|
-
def metadata_save(local_path: Union[str, Path], data:
|
|
523
|
+
def metadata_save(local_path: Union[str, Path], data: dict) -> None:
|
|
525
524
|
"""
|
|
526
525
|
Save the metadata dict in the upper YAML part Trying to preserve newlines as
|
|
527
526
|
in the existing file. Docs about open() with newline="" parameter:
|
|
@@ -569,7 +568,7 @@ def metadata_eval_result(
|
|
|
569
568
|
dataset_split: Optional[str] = None,
|
|
570
569
|
dataset_revision: Optional[str] = None,
|
|
571
570
|
metrics_verification_token: Optional[str] = None,
|
|
572
|
-
) ->
|
|
571
|
+
) -> dict:
|
|
573
572
|
"""
|
|
574
573
|
Creates a metadata dict with the result from a model evaluated on a dataset.
|
|
575
574
|
|
|
@@ -684,7 +683,7 @@ def metadata_eval_result(
|
|
|
684
683
|
@validate_hf_hub_args
|
|
685
684
|
def metadata_update(
|
|
686
685
|
repo_id: str,
|
|
687
|
-
metadata:
|
|
686
|
+
metadata: dict,
|
|
688
687
|
*,
|
|
689
688
|
repo_type: Optional[str] = None,
|
|
690
689
|
overwrite: bool = False,
|
|
@@ -752,7 +751,7 @@ def metadata_update(
|
|
|
752
751
|
commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
|
|
753
752
|
|
|
754
753
|
# Card class given repo_type
|
|
755
|
-
card_class:
|
|
754
|
+
card_class: type[RepoCard]
|
|
756
755
|
if repo_type is None or repo_type == "model":
|
|
757
756
|
card_class = ModelCard
|
|
758
757
|
elif repo_type == "dataset":
|
|
@@ -771,7 +770,8 @@ def metadata_update(
|
|
|
771
770
|
raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
|
|
772
771
|
|
|
773
772
|
# Initialize a ModelCard or DatasetCard from default template and no data.
|
|
774
|
-
card
|
|
773
|
+
# Cast to the concrete expected card type to satisfy type checkers.
|
|
774
|
+
card = card_class.from_template(CardData()) # type: ignore[return-value]
|
|
775
775
|
|
|
776
776
|
for key, value in metadata.items():
|
|
777
777
|
if key == "model-index":
|
huggingface_hub/repocard_data.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional, Union
|
|
5
5
|
|
|
6
6
|
from huggingface_hub.utils import logging, yaml_dump
|
|
7
7
|
|
|
@@ -38,7 +38,7 @@ class EvalResult:
|
|
|
38
38
|
dataset_revision (`str`, *optional*):
|
|
39
39
|
The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
|
|
40
40
|
Example: 5503434ddd753f426f4b38109466949a1217c2bb
|
|
41
|
-
dataset_args (`
|
|
41
|
+
dataset_args (`dict[str, Any]`, *optional*):
|
|
42
42
|
The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}`
|
|
43
43
|
metric_name (`str`, *optional*):
|
|
44
44
|
A pretty name for the metric. Example: "Test WER".
|
|
@@ -46,7 +46,7 @@ class EvalResult:
|
|
|
46
46
|
The name of the metric configuration used in `load_metric()`.
|
|
47
47
|
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
|
|
48
48
|
See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
|
|
49
|
-
metric_args (`
|
|
49
|
+
metric_args (`dict[str, Any]`, *optional*):
|
|
50
50
|
The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4
|
|
51
51
|
verified (`bool`, *optional*):
|
|
52
52
|
Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
|
@@ -102,7 +102,7 @@ class EvalResult:
|
|
|
102
102
|
|
|
103
103
|
# The arguments passed during `Metric.compute()`.
|
|
104
104
|
# Example for `bleu`: max_order: 4
|
|
105
|
-
dataset_args: Optional[
|
|
105
|
+
dataset_args: Optional[dict[str, Any]] = None
|
|
106
106
|
|
|
107
107
|
# A pretty name for the metric.
|
|
108
108
|
# Example: Test WER
|
|
@@ -115,7 +115,7 @@ class EvalResult:
|
|
|
115
115
|
|
|
116
116
|
# The arguments passed during `Metric.compute()`.
|
|
117
117
|
# Example for `bleu`: max_order: 4
|
|
118
|
-
metric_args: Optional[
|
|
118
|
+
metric_args: Optional[dict[str, Any]] = None
|
|
119
119
|
|
|
120
120
|
# Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
|
121
121
|
verified: Optional[bool] = None
|
|
@@ -195,7 +195,7 @@ class CardData:
|
|
|
195
195
|
"""
|
|
196
196
|
pass
|
|
197
197
|
|
|
198
|
-
def to_yaml(self, line_break=None, original_order: Optional[
|
|
198
|
+
def to_yaml(self, line_break=None, original_order: Optional[list[str]] = None) -> str:
|
|
199
199
|
"""Dumps CardData to a YAML block for inclusion in a README.md file.
|
|
200
200
|
|
|
201
201
|
Args:
|
|
@@ -246,9 +246,9 @@ class CardData:
|
|
|
246
246
|
|
|
247
247
|
|
|
248
248
|
def _validate_eval_results(
|
|
249
|
-
eval_results: Optional[Union[EvalResult,
|
|
249
|
+
eval_results: Optional[Union[EvalResult, list[EvalResult]]],
|
|
250
250
|
model_name: Optional[str],
|
|
251
|
-
) ->
|
|
251
|
+
) -> list[EvalResult]:
|
|
252
252
|
if eval_results is None:
|
|
253
253
|
return []
|
|
254
254
|
if isinstance(eval_results, EvalResult):
|
|
@@ -266,17 +266,17 @@ class ModelCardData(CardData):
|
|
|
266
266
|
"""Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
267
267
|
|
|
268
268
|
Args:
|
|
269
|
-
base_model (`str` or `
|
|
269
|
+
base_model (`str` or `list[str]`, *optional*):
|
|
270
270
|
The identifier of the base model from which the model derives. This is applicable for example if your model is a
|
|
271
271
|
fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
|
|
272
272
|
if your model derives from multiple models). Defaults to None.
|
|
273
|
-
datasets (`Union[str,
|
|
273
|
+
datasets (`Union[str, list[str]]`, *optional*):
|
|
274
274
|
Dataset or list of datasets that were used to train this model. Should be a dataset ID
|
|
275
275
|
found on https://hf.co/datasets. Defaults to None.
|
|
276
|
-
eval_results (`Union[
|
|
276
|
+
eval_results (`Union[list[EvalResult], EvalResult]`, *optional*):
|
|
277
277
|
List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided,
|
|
278
278
|
`model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`.
|
|
279
|
-
language (`Union[str,
|
|
279
|
+
language (`Union[str, list[str]]`, *optional*):
|
|
280
280
|
Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
|
|
281
281
|
639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
|
|
282
282
|
library_name (`str`, *optional*):
|
|
@@ -292,7 +292,7 @@ class ModelCardData(CardData):
|
|
|
292
292
|
license_link (`str`, *optional*):
|
|
293
293
|
Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`.
|
|
294
294
|
Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead.
|
|
295
|
-
metrics (`
|
|
295
|
+
metrics (`list[str]`, *optional*):
|
|
296
296
|
List of metrics used to evaluate this model. Should be a metric name that can be found
|
|
297
297
|
at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
|
|
298
298
|
model_name (`str`, *optional*):
|
|
@@ -302,7 +302,7 @@ class ModelCardData(CardData):
|
|
|
302
302
|
then the repo name is used as a default. Defaults to None.
|
|
303
303
|
pipeline_tag (`str`, *optional*):
|
|
304
304
|
The pipeline tag associated with the model. Example: "text-classification".
|
|
305
|
-
tags (`
|
|
305
|
+
tags (`list[str]`, *optional*):
|
|
306
306
|
List of tags to add to your model that can be used when filtering on the Hugging
|
|
307
307
|
Face Hub. Defaults to None.
|
|
308
308
|
ignore_metadata_errors (`str`):
|
|
@@ -329,18 +329,18 @@ class ModelCardData(CardData):
|
|
|
329
329
|
def __init__(
|
|
330
330
|
self,
|
|
331
331
|
*,
|
|
332
|
-
base_model: Optional[Union[str,
|
|
333
|
-
datasets: Optional[Union[str,
|
|
334
|
-
eval_results: Optional[
|
|
335
|
-
language: Optional[Union[str,
|
|
332
|
+
base_model: Optional[Union[str, list[str]]] = None,
|
|
333
|
+
datasets: Optional[Union[str, list[str]]] = None,
|
|
334
|
+
eval_results: Optional[list[EvalResult]] = None,
|
|
335
|
+
language: Optional[Union[str, list[str]]] = None,
|
|
336
336
|
library_name: Optional[str] = None,
|
|
337
337
|
license: Optional[str] = None,
|
|
338
338
|
license_name: Optional[str] = None,
|
|
339
339
|
license_link: Optional[str] = None,
|
|
340
|
-
metrics: Optional[
|
|
340
|
+
metrics: Optional[list[str]] = None,
|
|
341
341
|
model_name: Optional[str] = None,
|
|
342
342
|
pipeline_tag: Optional[str] = None,
|
|
343
|
-
tags: Optional[
|
|
343
|
+
tags: Optional[list[str]] = None,
|
|
344
344
|
ignore_metadata_errors: bool = False,
|
|
345
345
|
**kwargs,
|
|
346
346
|
):
|
|
@@ -395,58 +395,58 @@ class DatasetCardData(CardData):
|
|
|
395
395
|
"""Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
396
396
|
|
|
397
397
|
Args:
|
|
398
|
-
language (`
|
|
398
|
+
language (`list[str]`, *optional*):
|
|
399
399
|
Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or
|
|
400
400
|
639-3 code (two/three letters), or a special value like "code", "multilingual".
|
|
401
|
-
license (`Union[str,
|
|
401
|
+
license (`Union[str, list[str]]`, *optional*):
|
|
402
402
|
License(s) of this dataset. Example: apache-2.0 or any license from
|
|
403
403
|
https://huggingface.co/docs/hub/repositories-licenses.
|
|
404
|
-
annotations_creators (`Union[str,
|
|
404
|
+
annotations_creators (`Union[str, list[str]]`, *optional*):
|
|
405
405
|
How the annotations for the dataset were created.
|
|
406
406
|
Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'.
|
|
407
|
-
language_creators (`Union[str,
|
|
407
|
+
language_creators (`Union[str, list[str]]`, *optional*):
|
|
408
408
|
How the text-based data in the dataset was created.
|
|
409
409
|
Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other'
|
|
410
|
-
multilinguality (`Union[str,
|
|
410
|
+
multilinguality (`Union[str, list[str]]`, *optional*):
|
|
411
411
|
Whether the dataset is multilingual.
|
|
412
412
|
Options are: 'monolingual', 'multilingual', 'translation', 'other'.
|
|
413
|
-
size_categories (`Union[str,
|
|
413
|
+
size_categories (`Union[str, list[str]]`, *optional*):
|
|
414
414
|
The number of examples in the dataset. Options are: 'n<1K', '1K<n<10K', '10K<n<100K',
|
|
415
415
|
'100K<n<1M', '1M<n<10M', '10M<n<100M', '100M<n<1B', '1B<n<10B', '10B<n<100B', '100B<n<1T', 'n>1T', and 'other'.
|
|
416
|
-
source_datasets (`
|
|
416
|
+
source_datasets (`list[str]]`, *optional*):
|
|
417
417
|
Indicates whether the dataset is an original dataset or extended from another existing dataset.
|
|
418
418
|
Options are: 'original' and 'extended'.
|
|
419
|
-
task_categories (`Union[str,
|
|
419
|
+
task_categories (`Union[str, list[str]]`, *optional*):
|
|
420
420
|
What categories of task does the dataset support?
|
|
421
|
-
task_ids (`Union[str,
|
|
421
|
+
task_ids (`Union[str, list[str]]`, *optional*):
|
|
422
422
|
What specific tasks does the dataset support?
|
|
423
423
|
paperswithcode_id (`str`, *optional*):
|
|
424
424
|
ID of the dataset on PapersWithCode.
|
|
425
425
|
pretty_name (`str`, *optional*):
|
|
426
426
|
A more human-readable name for the dataset. (ex. "Cats vs. Dogs")
|
|
427
|
-
train_eval_index (`
|
|
427
|
+
train_eval_index (`dict`, *optional*):
|
|
428
428
|
A dictionary that describes the necessary spec for doing evaluation on the Hub.
|
|
429
429
|
If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
|
|
430
|
-
config_names (`Union[str,
|
|
430
|
+
config_names (`Union[str, list[str]]`, *optional*):
|
|
431
431
|
A list of the available dataset configs for the dataset.
|
|
432
432
|
"""
|
|
433
433
|
|
|
434
434
|
def __init__(
|
|
435
435
|
self,
|
|
436
436
|
*,
|
|
437
|
-
language: Optional[Union[str,
|
|
438
|
-
license: Optional[Union[str,
|
|
439
|
-
annotations_creators: Optional[Union[str,
|
|
440
|
-
language_creators: Optional[Union[str,
|
|
441
|
-
multilinguality: Optional[Union[str,
|
|
442
|
-
size_categories: Optional[Union[str,
|
|
443
|
-
source_datasets: Optional[
|
|
444
|
-
task_categories: Optional[Union[str,
|
|
445
|
-
task_ids: Optional[Union[str,
|
|
437
|
+
language: Optional[Union[str, list[str]]] = None,
|
|
438
|
+
license: Optional[Union[str, list[str]]] = None,
|
|
439
|
+
annotations_creators: Optional[Union[str, list[str]]] = None,
|
|
440
|
+
language_creators: Optional[Union[str, list[str]]] = None,
|
|
441
|
+
multilinguality: Optional[Union[str, list[str]]] = None,
|
|
442
|
+
size_categories: Optional[Union[str, list[str]]] = None,
|
|
443
|
+
source_datasets: Optional[list[str]] = None,
|
|
444
|
+
task_categories: Optional[Union[str, list[str]]] = None,
|
|
445
|
+
task_ids: Optional[Union[str, list[str]]] = None,
|
|
446
446
|
paperswithcode_id: Optional[str] = None,
|
|
447
447
|
pretty_name: Optional[str] = None,
|
|
448
|
-
train_eval_index: Optional[
|
|
449
|
-
config_names: Optional[Union[str,
|
|
448
|
+
train_eval_index: Optional[dict] = None,
|
|
449
|
+
config_names: Optional[Union[str, list[str]]] = None,
|
|
450
450
|
ignore_metadata_errors: bool = False,
|
|
451
451
|
**kwargs,
|
|
452
452
|
):
|
|
@@ -495,11 +495,11 @@ class SpaceCardData(CardData):
|
|
|
495
495
|
https://huggingface.co/docs/hub/repositories-licenses.
|
|
496
496
|
duplicated_from (`str`, *optional*)
|
|
497
497
|
ID of the original Space if this is a duplicated Space.
|
|
498
|
-
models (
|
|
498
|
+
models (list[`str`], *optional*)
|
|
499
499
|
List of models related to this Space. Should be a dataset ID found on https://hf.co/models.
|
|
500
|
-
datasets (`
|
|
500
|
+
datasets (`list[str]`, *optional*)
|
|
501
501
|
List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
|
|
502
|
-
tags (`
|
|
502
|
+
tags (`list[str]`, *optional*)
|
|
503
503
|
List of tags to add to your Space that can be used when filtering on the Hub.
|
|
504
504
|
ignore_metadata_errors (`str`):
|
|
505
505
|
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
|
@@ -532,9 +532,9 @@ class SpaceCardData(CardData):
|
|
|
532
532
|
app_port: Optional[int] = None,
|
|
533
533
|
license: Optional[str] = None,
|
|
534
534
|
duplicated_from: Optional[str] = None,
|
|
535
|
-
models: Optional[
|
|
536
|
-
datasets: Optional[
|
|
537
|
-
tags: Optional[
|
|
535
|
+
models: Optional[list[str]] = None,
|
|
536
|
+
datasets: Optional[list[str]] = None,
|
|
537
|
+
tags: Optional[list[str]] = None,
|
|
538
538
|
ignore_metadata_errors: bool = False,
|
|
539
539
|
**kwargs,
|
|
540
540
|
):
|
|
@@ -552,14 +552,14 @@ class SpaceCardData(CardData):
|
|
|
552
552
|
super().__init__(**kwargs)
|
|
553
553
|
|
|
554
554
|
|
|
555
|
-
def model_index_to_eval_results(model_index:
|
|
555
|
+
def model_index_to_eval_results(model_index: list[dict[str, Any]]) -> tuple[str, list[EvalResult]]:
|
|
556
556
|
"""Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects.
|
|
557
557
|
|
|
558
558
|
A detailed spec of the model index can be found here:
|
|
559
559
|
https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
|
560
560
|
|
|
561
561
|
Args:
|
|
562
|
-
model_index (`
|
|
562
|
+
model_index (`list[dict[str, Any]]`):
|
|
563
563
|
A model index data structure, likely coming from a README.md file on the
|
|
564
564
|
Hugging Face Hub.
|
|
565
565
|
|
|
@@ -567,7 +567,7 @@ def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str,
|
|
|
567
567
|
model_name (`str`):
|
|
568
568
|
The name of the model as found in the model index. This is used as the
|
|
569
569
|
identifier for the model on leaderboards like PapersWithCode.
|
|
570
|
-
eval_results (`
|
|
570
|
+
eval_results (`list[EvalResult]`):
|
|
571
571
|
A list of `huggingface_hub.EvalResult` objects containing the metrics
|
|
572
572
|
reported in the provided model_index.
|
|
573
573
|
|
|
@@ -668,7 +668,7 @@ def _remove_none(obj):
|
|
|
668
668
|
return obj
|
|
669
669
|
|
|
670
670
|
|
|
671
|
-
def eval_results_to_model_index(model_name: str, eval_results:
|
|
671
|
+
def eval_results_to_model_index(model_name: str, eval_results: list[EvalResult]) -> list[dict[str, Any]]:
|
|
672
672
|
"""Takes in given model name and list of `huggingface_hub.EvalResult` and returns a
|
|
673
673
|
valid model-index that will be compatible with the format expected by the
|
|
674
674
|
Hugging Face Hub.
|
|
@@ -677,12 +677,12 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
677
677
|
model_name (`str`):
|
|
678
678
|
Name of the model (ex. "my-cool-model"). This is used as the identifier
|
|
679
679
|
for the model on leaderboards like PapersWithCode.
|
|
680
|
-
eval_results (`
|
|
680
|
+
eval_results (`list[EvalResult]`):
|
|
681
681
|
List of `huggingface_hub.EvalResult` objects containing the metrics to be
|
|
682
682
|
reported in the model-index.
|
|
683
683
|
|
|
684
684
|
Returns:
|
|
685
|
-
model_index (`
|
|
685
|
+
model_index (`list[dict[str, Any]]`): The eval_results converted to a model-index.
|
|
686
686
|
|
|
687
687
|
Example:
|
|
688
688
|
```python
|
|
@@ -705,7 +705,7 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
705
705
|
|
|
706
706
|
# Metrics are reported on a unique task-and-dataset basis.
|
|
707
707
|
# Here, we make a map of those pairs and the associated EvalResults.
|
|
708
|
-
task_and_ds_types_map:
|
|
708
|
+
task_and_ds_types_map: dict[Any, list[EvalResult]] = defaultdict(list)
|
|
709
709
|
for eval_result in eval_results:
|
|
710
710
|
task_and_ds_types_map[eval_result.unique_identifier].append(eval_result)
|
|
711
711
|
|
|
@@ -760,7 +760,7 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
760
760
|
return _remove_none(model_index)
|
|
761
761
|
|
|
762
762
|
|
|
763
|
-
def _to_unique_list(tags: Optional[
|
|
763
|
+
def _to_unique_list(tags: Optional[list[str]]) -> Optional[list[str]]:
|
|
764
764
|
if tags is None:
|
|
765
765
|
return tags
|
|
766
766
|
unique_tags = [] # make tags unique + keep order explicitly
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
"""Contains helpers to serialize tensors."""
|
|
16
16
|
|
|
17
17
|
from ._base import StateDictSplit, split_state_dict_into_shards_factory
|
|
18
|
-
from ._tensorflow import get_tf_storage_size, split_tf_state_dict_into_shards
|
|
19
18
|
from ._torch import (
|
|
20
19
|
get_torch_storage_id,
|
|
21
20
|
get_torch_storage_size,
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"""Contains helpers to split tensors into shards."""
|
|
15
15
|
|
|
16
16
|
from dataclasses import dataclass, field
|
|
17
|
-
from typing import Any, Callable,
|
|
17
|
+
from typing import Any, Callable, Optional, TypeVar, Union
|
|
18
18
|
|
|
19
19
|
from .. import logging
|
|
20
20
|
|
|
@@ -38,16 +38,16 @@ logger = logging.get_logger(__file__)
|
|
|
38
38
|
@dataclass
|
|
39
39
|
class StateDictSplit:
|
|
40
40
|
is_sharded: bool = field(init=False)
|
|
41
|
-
metadata:
|
|
42
|
-
filename_to_tensors:
|
|
43
|
-
tensor_to_filename:
|
|
41
|
+
metadata: dict[str, Any]
|
|
42
|
+
filename_to_tensors: dict[str, list[str]]
|
|
43
|
+
tensor_to_filename: dict[str, str]
|
|
44
44
|
|
|
45
45
|
def __post_init__(self):
|
|
46
46
|
self.is_sharded = len(self.filename_to_tensors) > 1
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def split_state_dict_into_shards_factory(
|
|
50
|
-
state_dict:
|
|
50
|
+
state_dict: dict[str, TensorT],
|
|
51
51
|
*,
|
|
52
52
|
get_storage_size: TensorSizeFn_T,
|
|
53
53
|
filename_pattern: str,
|
|
@@ -70,7 +70,7 @@ def split_state_dict_into_shards_factory(
|
|
|
70
70
|
</Tip>
|
|
71
71
|
|
|
72
72
|
Args:
|
|
73
|
-
state_dict (`
|
|
73
|
+
state_dict (`dict[str, Tensor]`):
|
|
74
74
|
The state dictionary to save.
|
|
75
75
|
get_storage_size (`Callable[[Tensor], int]`):
|
|
76
76
|
A function that returns the size of a tensor when saved on disk in bytes.
|
|
@@ -87,10 +87,10 @@ def split_state_dict_into_shards_factory(
|
|
|
87
87
|
Returns:
|
|
88
88
|
[`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
|
|
89
89
|
"""
|
|
90
|
-
storage_id_to_tensors:
|
|
90
|
+
storage_id_to_tensors: dict[Any, list[str]] = {}
|
|
91
91
|
|
|
92
|
-
shard_list:
|
|
93
|
-
current_shard:
|
|
92
|
+
shard_list: list[dict[str, TensorT]] = []
|
|
93
|
+
current_shard: dict[str, TensorT] = {}
|
|
94
94
|
current_shard_size = 0
|
|
95
95
|
total_size = 0
|
|
96
96
|
|