huggingface-hub 0.35.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +28 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +20 -20
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +5 -5
- huggingface_hub/_upload_large_folder.py +15 -15
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/__init__.py +0 -14
- huggingface_hub/cli/_cli_utils.py +80 -3
- huggingface_hub/cli/auth.py +104 -150
- huggingface_hub/cli/cache.py +102 -126
- huggingface_hub/cli/download.py +93 -110
- huggingface_hub/cli/hf.py +37 -41
- huggingface_hub/cli/jobs.py +689 -1017
- huggingface_hub/cli/lfs.py +120 -143
- huggingface_hub/cli/repo.py +158 -216
- huggingface_hub/cli/repo_files.py +50 -84
- huggingface_hub/cli/system.py +6 -25
- huggingface_hub/cli/upload.py +198 -212
- huggingface_hub/cli/upload_large_folder.py +90 -105
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +11 -11
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -22
- huggingface_hub/errors.py +43 -30
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +154 -253
- huggingface_hub/hf_api.py +329 -558
- huggingface_hub/hf_file_system.py +104 -62
- huggingface_hub/hub_mixin.py +32 -54
- huggingface_hub/inference/_client.py +178 -163
- huggingface_hub/inference/_common.py +38 -54
- huggingface_hub/inference/_generated/_async_client.py +219 -259
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/constants.py +1 -2
- huggingface_hub/inference/_mcp/mcp_client.py +33 -22
- huggingface_hub/inference/_mcp/types.py +10 -10
- huggingface_hub/inference/_mcp/utils.py +4 -4
- huggingface_hub/inference/_providers/__init__.py +2 -13
- huggingface_hub/inference/_providers/_common.py +24 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +13 -13
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +24 -33
- huggingface_hub/repocard.py +16 -17
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +3 -3
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +369 -209
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +15 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +3 -3
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +1 -1
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/METADATA +17 -26
- huggingface_hub-1.0.0rc1.dist-info/RECORD +161 -0
- huggingface_hub/inference/_providers/publicai.py +0 -6
- huggingface_hub/inference/_providers/scaleway.py +0 -28
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.1.dist-info/RECORD +0 -168
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/top_level.txt +0 -0
huggingface_hub/repocard.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Literal, Optional, Union
|
|
5
5
|
|
|
6
|
-
import requests
|
|
7
6
|
import yaml
|
|
8
7
|
|
|
9
8
|
from huggingface_hub.file_download import hf_hub_download
|
|
@@ -17,7 +16,7 @@ from huggingface_hub.repocard_data import (
|
|
|
17
16
|
eval_results_to_model_index,
|
|
18
17
|
model_index_to_eval_results,
|
|
19
18
|
)
|
|
20
|
-
from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
|
|
19
|
+
from huggingface_hub.utils import HfHubHTTPError, get_session, hf_raise_for_status, is_jinja_available, yaml_dump
|
|
21
20
|
|
|
22
21
|
from . import constants
|
|
23
22
|
from .errors import EntryNotFoundError
|
|
@@ -204,7 +203,7 @@ class RepoCard:
|
|
|
204
203
|
|
|
205
204
|
- [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
|
|
206
205
|
if the card fails validation checks.
|
|
207
|
-
- [`
|
|
206
|
+
- [`HfHubHTTPError`]
|
|
208
207
|
if the request to the Hub API fails for any other reason.
|
|
209
208
|
|
|
210
209
|
</Tip>
|
|
@@ -220,11 +219,11 @@ class RepoCard:
|
|
|
220
219
|
headers = {"Accept": "text/plain"}
|
|
221
220
|
|
|
222
221
|
try:
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
except
|
|
226
|
-
if
|
|
227
|
-
raise ValueError(
|
|
222
|
+
response = get_session().post("https://huggingface.co/api/validate-yaml", json=body, headers=headers)
|
|
223
|
+
hf_raise_for_status(response)
|
|
224
|
+
except HfHubHTTPError as exc:
|
|
225
|
+
if response.status_code == 400:
|
|
226
|
+
raise ValueError(response.text)
|
|
228
227
|
else:
|
|
229
228
|
raise exc
|
|
230
229
|
|
|
@@ -336,7 +335,7 @@ class RepoCard:
|
|
|
336
335
|
|
|
337
336
|
|
|
338
337
|
class ModelCard(RepoCard):
|
|
339
|
-
card_data_class = ModelCardData
|
|
338
|
+
card_data_class = ModelCardData # type: ignore[assignment]
|
|
340
339
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
341
340
|
repo_type = "model"
|
|
342
341
|
|
|
@@ -417,7 +416,7 @@ class ModelCard(RepoCard):
|
|
|
417
416
|
|
|
418
417
|
|
|
419
418
|
class DatasetCard(RepoCard):
|
|
420
|
-
card_data_class = DatasetCardData
|
|
419
|
+
card_data_class = DatasetCardData # type: ignore[assignment]
|
|
421
420
|
default_template_path = TEMPLATE_DATASETCARD_PATH
|
|
422
421
|
repo_type = "dataset"
|
|
423
422
|
|
|
@@ -482,7 +481,7 @@ class DatasetCard(RepoCard):
|
|
|
482
481
|
|
|
483
482
|
|
|
484
483
|
class SpaceCard(RepoCard):
|
|
485
|
-
card_data_class = SpaceCardData
|
|
484
|
+
card_data_class = SpaceCardData # type: ignore[assignment]
|
|
486
485
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
487
486
|
repo_type = "space"
|
|
488
487
|
|
|
@@ -508,7 +507,7 @@ def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # n
|
|
|
508
507
|
return "\n"
|
|
509
508
|
|
|
510
509
|
|
|
511
|
-
def metadata_load(local_path: Union[str, Path]) -> Optional[
|
|
510
|
+
def metadata_load(local_path: Union[str, Path]) -> Optional[dict]:
|
|
512
511
|
content = Path(local_path).read_text()
|
|
513
512
|
match = REGEX_YAML_BLOCK.search(content)
|
|
514
513
|
if match:
|
|
@@ -521,7 +520,7 @@ def metadata_load(local_path: Union[str, Path]) -> Optional[Dict]:
|
|
|
521
520
|
return None
|
|
522
521
|
|
|
523
522
|
|
|
524
|
-
def metadata_save(local_path: Union[str, Path], data:
|
|
523
|
+
def metadata_save(local_path: Union[str, Path], data: dict) -> None:
|
|
525
524
|
"""
|
|
526
525
|
Save the metadata dict in the upper YAML part Trying to preserve newlines as
|
|
527
526
|
in the existing file. Docs about open() with newline="" parameter:
|
|
@@ -569,7 +568,7 @@ def metadata_eval_result(
|
|
|
569
568
|
dataset_split: Optional[str] = None,
|
|
570
569
|
dataset_revision: Optional[str] = None,
|
|
571
570
|
metrics_verification_token: Optional[str] = None,
|
|
572
|
-
) ->
|
|
571
|
+
) -> dict:
|
|
573
572
|
"""
|
|
574
573
|
Creates a metadata dict with the result from a model evaluated on a dataset.
|
|
575
574
|
|
|
@@ -684,7 +683,7 @@ def metadata_eval_result(
|
|
|
684
683
|
@validate_hf_hub_args
|
|
685
684
|
def metadata_update(
|
|
686
685
|
repo_id: str,
|
|
687
|
-
metadata:
|
|
686
|
+
metadata: dict,
|
|
688
687
|
*,
|
|
689
688
|
repo_type: Optional[str] = None,
|
|
690
689
|
overwrite: bool = False,
|
|
@@ -752,7 +751,7 @@ def metadata_update(
|
|
|
752
751
|
commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
|
|
753
752
|
|
|
754
753
|
# Card class given repo_type
|
|
755
|
-
card_class:
|
|
754
|
+
card_class: type[RepoCard]
|
|
756
755
|
if repo_type is None or repo_type == "model":
|
|
757
756
|
card_class = ModelCard
|
|
758
757
|
elif repo_type == "dataset":
|
huggingface_hub/repocard_data.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional, Union
|
|
5
5
|
|
|
6
6
|
from huggingface_hub.utils import logging, yaml_dump
|
|
7
7
|
|
|
@@ -38,7 +38,7 @@ class EvalResult:
|
|
|
38
38
|
dataset_revision (`str`, *optional*):
|
|
39
39
|
The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
|
|
40
40
|
Example: 5503434ddd753f426f4b38109466949a1217c2bb
|
|
41
|
-
dataset_args (`
|
|
41
|
+
dataset_args (`dict[str, Any]`, *optional*):
|
|
42
42
|
The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}`
|
|
43
43
|
metric_name (`str`, *optional*):
|
|
44
44
|
A pretty name for the metric. Example: "Test WER".
|
|
@@ -46,7 +46,7 @@ class EvalResult:
|
|
|
46
46
|
The name of the metric configuration used in `load_metric()`.
|
|
47
47
|
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
|
|
48
48
|
See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
|
|
49
|
-
metric_args (`
|
|
49
|
+
metric_args (`dict[str, Any]`, *optional*):
|
|
50
50
|
The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4
|
|
51
51
|
verified (`bool`, *optional*):
|
|
52
52
|
Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
|
@@ -102,7 +102,7 @@ class EvalResult:
|
|
|
102
102
|
|
|
103
103
|
# The arguments passed during `Metric.compute()`.
|
|
104
104
|
# Example for `bleu`: max_order: 4
|
|
105
|
-
dataset_args: Optional[
|
|
105
|
+
dataset_args: Optional[dict[str, Any]] = None
|
|
106
106
|
|
|
107
107
|
# A pretty name for the metric.
|
|
108
108
|
# Example: Test WER
|
|
@@ -115,7 +115,7 @@ class EvalResult:
|
|
|
115
115
|
|
|
116
116
|
# The arguments passed during `Metric.compute()`.
|
|
117
117
|
# Example for `bleu`: max_order: 4
|
|
118
|
-
metric_args: Optional[
|
|
118
|
+
metric_args: Optional[dict[str, Any]] = None
|
|
119
119
|
|
|
120
120
|
# Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
|
121
121
|
verified: Optional[bool] = None
|
|
@@ -195,7 +195,7 @@ class CardData:
|
|
|
195
195
|
"""
|
|
196
196
|
pass
|
|
197
197
|
|
|
198
|
-
def to_yaml(self, line_break=None, original_order: Optional[
|
|
198
|
+
def to_yaml(self, line_break=None, original_order: Optional[list[str]] = None) -> str:
|
|
199
199
|
"""Dumps CardData to a YAML block for inclusion in a README.md file.
|
|
200
200
|
|
|
201
201
|
Args:
|
|
@@ -246,9 +246,9 @@ class CardData:
|
|
|
246
246
|
|
|
247
247
|
|
|
248
248
|
def _validate_eval_results(
|
|
249
|
-
eval_results: Optional[Union[EvalResult,
|
|
249
|
+
eval_results: Optional[Union[EvalResult, list[EvalResult]]],
|
|
250
250
|
model_name: Optional[str],
|
|
251
|
-
) ->
|
|
251
|
+
) -> list[EvalResult]:
|
|
252
252
|
if eval_results is None:
|
|
253
253
|
return []
|
|
254
254
|
if isinstance(eval_results, EvalResult):
|
|
@@ -266,17 +266,17 @@ class ModelCardData(CardData):
|
|
|
266
266
|
"""Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
267
267
|
|
|
268
268
|
Args:
|
|
269
|
-
base_model (`str` or `
|
|
269
|
+
base_model (`str` or `list[str]`, *optional*):
|
|
270
270
|
The identifier of the base model from which the model derives. This is applicable for example if your model is a
|
|
271
271
|
fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
|
|
272
272
|
if your model derives from multiple models). Defaults to None.
|
|
273
|
-
datasets (`Union[str,
|
|
273
|
+
datasets (`Union[str, list[str]]`, *optional*):
|
|
274
274
|
Dataset or list of datasets that were used to train this model. Should be a dataset ID
|
|
275
275
|
found on https://hf.co/datasets. Defaults to None.
|
|
276
|
-
eval_results (`Union[
|
|
276
|
+
eval_results (`Union[list[EvalResult], EvalResult]`, *optional*):
|
|
277
277
|
List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided,
|
|
278
278
|
`model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`.
|
|
279
|
-
language (`Union[str,
|
|
279
|
+
language (`Union[str, list[str]]`, *optional*):
|
|
280
280
|
Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
|
|
281
281
|
639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
|
|
282
282
|
library_name (`str`, *optional*):
|
|
@@ -292,7 +292,7 @@ class ModelCardData(CardData):
|
|
|
292
292
|
license_link (`str`, *optional*):
|
|
293
293
|
Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`.
|
|
294
294
|
Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead.
|
|
295
|
-
metrics (`
|
|
295
|
+
metrics (`list[str]`, *optional*):
|
|
296
296
|
List of metrics used to evaluate this model. Should be a metric name that can be found
|
|
297
297
|
at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
|
|
298
298
|
model_name (`str`, *optional*):
|
|
@@ -302,7 +302,7 @@ class ModelCardData(CardData):
|
|
|
302
302
|
then the repo name is used as a default. Defaults to None.
|
|
303
303
|
pipeline_tag (`str`, *optional*):
|
|
304
304
|
The pipeline tag associated with the model. Example: "text-classification".
|
|
305
|
-
tags (`
|
|
305
|
+
tags (`list[str]`, *optional*):
|
|
306
306
|
List of tags to add to your model that can be used when filtering on the Hugging
|
|
307
307
|
Face Hub. Defaults to None.
|
|
308
308
|
ignore_metadata_errors (`str`):
|
|
@@ -329,18 +329,18 @@ class ModelCardData(CardData):
|
|
|
329
329
|
def __init__(
|
|
330
330
|
self,
|
|
331
331
|
*,
|
|
332
|
-
base_model: Optional[Union[str,
|
|
333
|
-
datasets: Optional[Union[str,
|
|
334
|
-
eval_results: Optional[
|
|
335
|
-
language: Optional[Union[str,
|
|
332
|
+
base_model: Optional[Union[str, list[str]]] = None,
|
|
333
|
+
datasets: Optional[Union[str, list[str]]] = None,
|
|
334
|
+
eval_results: Optional[list[EvalResult]] = None,
|
|
335
|
+
language: Optional[Union[str, list[str]]] = None,
|
|
336
336
|
library_name: Optional[str] = None,
|
|
337
337
|
license: Optional[str] = None,
|
|
338
338
|
license_name: Optional[str] = None,
|
|
339
339
|
license_link: Optional[str] = None,
|
|
340
|
-
metrics: Optional[
|
|
340
|
+
metrics: Optional[list[str]] = None,
|
|
341
341
|
model_name: Optional[str] = None,
|
|
342
342
|
pipeline_tag: Optional[str] = None,
|
|
343
|
-
tags: Optional[
|
|
343
|
+
tags: Optional[list[str]] = None,
|
|
344
344
|
ignore_metadata_errors: bool = False,
|
|
345
345
|
**kwargs,
|
|
346
346
|
):
|
|
@@ -395,58 +395,58 @@ class DatasetCardData(CardData):
|
|
|
395
395
|
"""Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
396
396
|
|
|
397
397
|
Args:
|
|
398
|
-
language (`
|
|
398
|
+
language (`list[str]`, *optional*):
|
|
399
399
|
Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or
|
|
400
400
|
639-3 code (two/three letters), or a special value like "code", "multilingual".
|
|
401
|
-
license (`Union[str,
|
|
401
|
+
license (`Union[str, list[str]]`, *optional*):
|
|
402
402
|
License(s) of this dataset. Example: apache-2.0 or any license from
|
|
403
403
|
https://huggingface.co/docs/hub/repositories-licenses.
|
|
404
|
-
annotations_creators (`Union[str,
|
|
404
|
+
annotations_creators (`Union[str, list[str]]`, *optional*):
|
|
405
405
|
How the annotations for the dataset were created.
|
|
406
406
|
Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'.
|
|
407
|
-
language_creators (`Union[str,
|
|
407
|
+
language_creators (`Union[str, list[str]]`, *optional*):
|
|
408
408
|
How the text-based data in the dataset was created.
|
|
409
409
|
Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other'
|
|
410
|
-
multilinguality (`Union[str,
|
|
410
|
+
multilinguality (`Union[str, list[str]]`, *optional*):
|
|
411
411
|
Whether the dataset is multilingual.
|
|
412
412
|
Options are: 'monolingual', 'multilingual', 'translation', 'other'.
|
|
413
|
-
size_categories (`Union[str,
|
|
413
|
+
size_categories (`Union[str, list[str]]`, *optional*):
|
|
414
414
|
The number of examples in the dataset. Options are: 'n<1K', '1K<n<10K', '10K<n<100K',
|
|
415
415
|
'100K<n<1M', '1M<n<10M', '10M<n<100M', '100M<n<1B', '1B<n<10B', '10B<n<100B', '100B<n<1T', 'n>1T', and 'other'.
|
|
416
|
-
source_datasets (`
|
|
416
|
+
source_datasets (`list[str]]`, *optional*):
|
|
417
417
|
Indicates whether the dataset is an original dataset or extended from another existing dataset.
|
|
418
418
|
Options are: 'original' and 'extended'.
|
|
419
|
-
task_categories (`Union[str,
|
|
419
|
+
task_categories (`Union[str, list[str]]`, *optional*):
|
|
420
420
|
What categories of task does the dataset support?
|
|
421
|
-
task_ids (`Union[str,
|
|
421
|
+
task_ids (`Union[str, list[str]]`, *optional*):
|
|
422
422
|
What specific tasks does the dataset support?
|
|
423
423
|
paperswithcode_id (`str`, *optional*):
|
|
424
424
|
ID of the dataset on PapersWithCode.
|
|
425
425
|
pretty_name (`str`, *optional*):
|
|
426
426
|
A more human-readable name for the dataset. (ex. "Cats vs. Dogs")
|
|
427
|
-
train_eval_index (`
|
|
427
|
+
train_eval_index (`dict`, *optional*):
|
|
428
428
|
A dictionary that describes the necessary spec for doing evaluation on the Hub.
|
|
429
429
|
If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
|
|
430
|
-
config_names (`Union[str,
|
|
430
|
+
config_names (`Union[str, list[str]]`, *optional*):
|
|
431
431
|
A list of the available dataset configs for the dataset.
|
|
432
432
|
"""
|
|
433
433
|
|
|
434
434
|
def __init__(
|
|
435
435
|
self,
|
|
436
436
|
*,
|
|
437
|
-
language: Optional[Union[str,
|
|
438
|
-
license: Optional[Union[str,
|
|
439
|
-
annotations_creators: Optional[Union[str,
|
|
440
|
-
language_creators: Optional[Union[str,
|
|
441
|
-
multilinguality: Optional[Union[str,
|
|
442
|
-
size_categories: Optional[Union[str,
|
|
443
|
-
source_datasets: Optional[
|
|
444
|
-
task_categories: Optional[Union[str,
|
|
445
|
-
task_ids: Optional[Union[str,
|
|
437
|
+
language: Optional[Union[str, list[str]]] = None,
|
|
438
|
+
license: Optional[Union[str, list[str]]] = None,
|
|
439
|
+
annotations_creators: Optional[Union[str, list[str]]] = None,
|
|
440
|
+
language_creators: Optional[Union[str, list[str]]] = None,
|
|
441
|
+
multilinguality: Optional[Union[str, list[str]]] = None,
|
|
442
|
+
size_categories: Optional[Union[str, list[str]]] = None,
|
|
443
|
+
source_datasets: Optional[list[str]] = None,
|
|
444
|
+
task_categories: Optional[Union[str, list[str]]] = None,
|
|
445
|
+
task_ids: Optional[Union[str, list[str]]] = None,
|
|
446
446
|
paperswithcode_id: Optional[str] = None,
|
|
447
447
|
pretty_name: Optional[str] = None,
|
|
448
|
-
train_eval_index: Optional[
|
|
449
|
-
config_names: Optional[Union[str,
|
|
448
|
+
train_eval_index: Optional[dict] = None,
|
|
449
|
+
config_names: Optional[Union[str, list[str]]] = None,
|
|
450
450
|
ignore_metadata_errors: bool = False,
|
|
451
451
|
**kwargs,
|
|
452
452
|
):
|
|
@@ -495,11 +495,11 @@ class SpaceCardData(CardData):
|
|
|
495
495
|
https://huggingface.co/docs/hub/repositories-licenses.
|
|
496
496
|
duplicated_from (`str`, *optional*)
|
|
497
497
|
ID of the original Space if this is a duplicated Space.
|
|
498
|
-
models (
|
|
498
|
+
models (list[`str`], *optional*)
|
|
499
499
|
List of models related to this Space. Should be a dataset ID found on https://hf.co/models.
|
|
500
|
-
datasets (`
|
|
500
|
+
datasets (`list[str]`, *optional*)
|
|
501
501
|
List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
|
|
502
|
-
tags (`
|
|
502
|
+
tags (`list[str]`, *optional*)
|
|
503
503
|
List of tags to add to your Space that can be used when filtering on the Hub.
|
|
504
504
|
ignore_metadata_errors (`str`):
|
|
505
505
|
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
|
@@ -532,9 +532,9 @@ class SpaceCardData(CardData):
|
|
|
532
532
|
app_port: Optional[int] = None,
|
|
533
533
|
license: Optional[str] = None,
|
|
534
534
|
duplicated_from: Optional[str] = None,
|
|
535
|
-
models: Optional[
|
|
536
|
-
datasets: Optional[
|
|
537
|
-
tags: Optional[
|
|
535
|
+
models: Optional[list[str]] = None,
|
|
536
|
+
datasets: Optional[list[str]] = None,
|
|
537
|
+
tags: Optional[list[str]] = None,
|
|
538
538
|
ignore_metadata_errors: bool = False,
|
|
539
539
|
**kwargs,
|
|
540
540
|
):
|
|
@@ -552,14 +552,14 @@ class SpaceCardData(CardData):
|
|
|
552
552
|
super().__init__(**kwargs)
|
|
553
553
|
|
|
554
554
|
|
|
555
|
-
def model_index_to_eval_results(model_index:
|
|
555
|
+
def model_index_to_eval_results(model_index: list[dict[str, Any]]) -> tuple[str, list[EvalResult]]:
|
|
556
556
|
"""Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects.
|
|
557
557
|
|
|
558
558
|
A detailed spec of the model index can be found here:
|
|
559
559
|
https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
|
560
560
|
|
|
561
561
|
Args:
|
|
562
|
-
model_index (`
|
|
562
|
+
model_index (`list[dict[str, Any]]`):
|
|
563
563
|
A model index data structure, likely coming from a README.md file on the
|
|
564
564
|
Hugging Face Hub.
|
|
565
565
|
|
|
@@ -567,7 +567,7 @@ def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str,
|
|
|
567
567
|
model_name (`str`):
|
|
568
568
|
The name of the model as found in the model index. This is used as the
|
|
569
569
|
identifier for the model on leaderboards like PapersWithCode.
|
|
570
|
-
eval_results (`
|
|
570
|
+
eval_results (`list[EvalResult]`):
|
|
571
571
|
A list of `huggingface_hub.EvalResult` objects containing the metrics
|
|
572
572
|
reported in the provided model_index.
|
|
573
573
|
|
|
@@ -668,7 +668,7 @@ def _remove_none(obj):
|
|
|
668
668
|
return obj
|
|
669
669
|
|
|
670
670
|
|
|
671
|
-
def eval_results_to_model_index(model_name: str, eval_results:
|
|
671
|
+
def eval_results_to_model_index(model_name: str, eval_results: list[EvalResult]) -> list[dict[str, Any]]:
|
|
672
672
|
"""Takes in given model name and list of `huggingface_hub.EvalResult` and returns a
|
|
673
673
|
valid model-index that will be compatible with the format expected by the
|
|
674
674
|
Hugging Face Hub.
|
|
@@ -677,12 +677,12 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
677
677
|
model_name (`str`):
|
|
678
678
|
Name of the model (ex. "my-cool-model"). This is used as the identifier
|
|
679
679
|
for the model on leaderboards like PapersWithCode.
|
|
680
|
-
eval_results (`
|
|
680
|
+
eval_results (`list[EvalResult]`):
|
|
681
681
|
List of `huggingface_hub.EvalResult` objects containing the metrics to be
|
|
682
682
|
reported in the model-index.
|
|
683
683
|
|
|
684
684
|
Returns:
|
|
685
|
-
model_index (`
|
|
685
|
+
model_index (`list[dict[str, Any]]`): The eval_results converted to a model-index.
|
|
686
686
|
|
|
687
687
|
Example:
|
|
688
688
|
```python
|
|
@@ -705,7 +705,7 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
705
705
|
|
|
706
706
|
# Metrics are reported on a unique task-and-dataset basis.
|
|
707
707
|
# Here, we make a map of those pairs and the associated EvalResults.
|
|
708
|
-
task_and_ds_types_map:
|
|
708
|
+
task_and_ds_types_map: dict[Any, list[EvalResult]] = defaultdict(list)
|
|
709
709
|
for eval_result in eval_results:
|
|
710
710
|
task_and_ds_types_map[eval_result.unique_identifier].append(eval_result)
|
|
711
711
|
|
|
@@ -760,7 +760,7 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
760
760
|
return _remove_none(model_index)
|
|
761
761
|
|
|
762
762
|
|
|
763
|
-
def _to_unique_list(tags: Optional[
|
|
763
|
+
def _to_unique_list(tags: Optional[list[str]]) -> Optional[list[str]]:
|
|
764
764
|
if tags is None:
|
|
765
765
|
return tags
|
|
766
766
|
unique_tags = [] # make tags unique + keep order explicitly
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
"""Contains helpers to serialize tensors."""
|
|
16
16
|
|
|
17
17
|
from ._base import StateDictSplit, split_state_dict_into_shards_factory
|
|
18
|
-
from ._tensorflow import get_tf_storage_size, split_tf_state_dict_into_shards
|
|
19
18
|
from ._torch import (
|
|
20
19
|
get_torch_storage_id,
|
|
21
20
|
get_torch_storage_size,
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"""Contains helpers to split tensors into shards."""
|
|
15
15
|
|
|
16
16
|
from dataclasses import dataclass, field
|
|
17
|
-
from typing import Any, Callable,
|
|
17
|
+
from typing import Any, Callable, Optional, TypeVar, Union
|
|
18
18
|
|
|
19
19
|
from .. import logging
|
|
20
20
|
|
|
@@ -38,16 +38,16 @@ logger = logging.get_logger(__file__)
|
|
|
38
38
|
@dataclass
|
|
39
39
|
class StateDictSplit:
|
|
40
40
|
is_sharded: bool = field(init=False)
|
|
41
|
-
metadata:
|
|
42
|
-
filename_to_tensors:
|
|
43
|
-
tensor_to_filename:
|
|
41
|
+
metadata: dict[str, Any]
|
|
42
|
+
filename_to_tensors: dict[str, list[str]]
|
|
43
|
+
tensor_to_filename: dict[str, str]
|
|
44
44
|
|
|
45
45
|
def __post_init__(self):
|
|
46
46
|
self.is_sharded = len(self.filename_to_tensors) > 1
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def split_state_dict_into_shards_factory(
|
|
50
|
-
state_dict:
|
|
50
|
+
state_dict: dict[str, TensorT],
|
|
51
51
|
*,
|
|
52
52
|
get_storage_size: TensorSizeFn_T,
|
|
53
53
|
filename_pattern: str,
|
|
@@ -70,7 +70,7 @@ def split_state_dict_into_shards_factory(
|
|
|
70
70
|
</Tip>
|
|
71
71
|
|
|
72
72
|
Args:
|
|
73
|
-
state_dict (`
|
|
73
|
+
state_dict (`dict[str, Tensor]`):
|
|
74
74
|
The state dictionary to save.
|
|
75
75
|
get_storage_size (`Callable[[Tensor], int]`):
|
|
76
76
|
A function that returns the size of a tensor when saved on disk in bytes.
|
|
@@ -87,10 +87,10 @@ def split_state_dict_into_shards_factory(
|
|
|
87
87
|
Returns:
|
|
88
88
|
[`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
|
|
89
89
|
"""
|
|
90
|
-
storage_id_to_tensors:
|
|
90
|
+
storage_id_to_tensors: dict[Any, list[str]] = {}
|
|
91
91
|
|
|
92
|
-
shard_list:
|
|
93
|
-
current_shard:
|
|
92
|
+
shard_list: list[dict[str, TensorT]] = []
|
|
93
|
+
current_shard: dict[str, TensorT] = {}
|
|
94
94
|
current_shard_size = 0
|
|
95
95
|
total_size = 0
|
|
96
96
|
|
|
@@ -7,7 +7,7 @@ import zipfile
|
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any,
|
|
10
|
+
from typing import Any, Generator, Iterable, Union
|
|
11
11
|
|
|
12
12
|
from ..errors import DDUFCorruptedFileError, DDUFExportError, DDUFInvalidEntryNameError
|
|
13
13
|
|
|
@@ -87,7 +87,7 @@ class DDUFEntry:
|
|
|
87
87
|
return f.read(self.length).decode(encoding=encoding)
|
|
88
88
|
|
|
89
89
|
|
|
90
|
-
def read_dduf_file(dduf_path: Union[os.PathLike, str]) ->
|
|
90
|
+
def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> dict[str, DDUFEntry]:
|
|
91
91
|
"""
|
|
92
92
|
Read a DDUF file and return a dictionary of entries.
|
|
93
93
|
|
|
@@ -98,7 +98,7 @@ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
|
|
|
98
98
|
The path to the DDUF file to read.
|
|
99
99
|
|
|
100
100
|
Returns:
|
|
101
|
-
`
|
|
101
|
+
`dict[str, DDUFEntry]`:
|
|
102
102
|
A dictionary of [`DDUFEntry`] indexed by filename.
|
|
103
103
|
|
|
104
104
|
Raises:
|
|
@@ -157,7 +157,7 @@ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
|
|
|
157
157
|
|
|
158
158
|
|
|
159
159
|
def export_entries_as_dduf(
|
|
160
|
-
dduf_path: Union[str, os.PathLike], entries: Iterable[
|
|
160
|
+
dduf_path: Union[str, os.PathLike], entries: Iterable[tuple[str, Union[str, Path, bytes]]]
|
|
161
161
|
) -> None:
|
|
162
162
|
"""Write a DDUF file from an iterable of entries.
|
|
163
163
|
|
|
@@ -167,7 +167,7 @@ def export_entries_as_dduf(
|
|
|
167
167
|
Args:
|
|
168
168
|
dduf_path (`str` or `os.PathLike`):
|
|
169
169
|
The path to the DDUF file to write.
|
|
170
|
-
entries (`Iterable[
|
|
170
|
+
entries (`Iterable[tuple[str, Union[str, Path, bytes]]]`):
|
|
171
171
|
An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
|
|
172
172
|
The filename should be the path to the file in the DDUF archive.
|
|
173
173
|
The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.
|
|
@@ -201,7 +201,7 @@ def export_entries_as_dduf(
|
|
|
201
201
|
>>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
|
|
202
202
|
... # ... do some work with the pipeline
|
|
203
203
|
|
|
204
|
-
>>> def as_entries(pipe: DiffusionPipeline) -> Generator[
|
|
204
|
+
>>> def as_entries(pipe: DiffusionPipeline) -> Generator[tuple[str, bytes], None, None]:
|
|
205
205
|
... # Build an generator that yields the entries to add to the DDUF file.
|
|
206
206
|
... # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
|
|
207
207
|
... # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
|
|
@@ -267,7 +267,7 @@ def export_folder_as_dduf(dduf_path: Union[str, os.PathLike], folder_path: Union
|
|
|
267
267
|
"""
|
|
268
268
|
folder_path = Path(folder_path)
|
|
269
269
|
|
|
270
|
-
def _iterate_over_folder() -> Iterable[
|
|
270
|
+
def _iterate_over_folder() -> Iterable[tuple[str, Path]]:
|
|
271
271
|
for path in Path(folder_path).glob("**/*"):
|
|
272
272
|
if not path.is_file():
|
|
273
273
|
continue
|