huggingface-hub 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +160 -46
- huggingface_hub/_commit_api.py +277 -71
- huggingface_hub/_commit_scheduler.py +15 -15
- huggingface_hub/_inference_endpoints.py +33 -22
- huggingface_hub/_jobs_api.py +301 -0
- huggingface_hub/_local_folder.py +18 -3
- huggingface_hub/_login.py +31 -63
- huggingface_hub/_oauth.py +460 -0
- huggingface_hub/_snapshot_download.py +241 -81
- huggingface_hub/_space_api.py +18 -10
- huggingface_hub/_tensorboard_logger.py +15 -19
- huggingface_hub/_upload_large_folder.py +196 -76
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +15 -25
- huggingface_hub/{commands → cli}/__init__.py +1 -15
- huggingface_hub/cli/_cli_utils.py +173 -0
- huggingface_hub/cli/auth.py +147 -0
- huggingface_hub/cli/cache.py +841 -0
- huggingface_hub/cli/download.py +189 -0
- huggingface_hub/cli/hf.py +60 -0
- huggingface_hub/cli/inference_endpoints.py +377 -0
- huggingface_hub/cli/jobs.py +772 -0
- huggingface_hub/cli/lfs.py +175 -0
- huggingface_hub/cli/repo.py +315 -0
- huggingface_hub/cli/repo_files.py +94 -0
- huggingface_hub/{commands/env.py → cli/system.py} +10 -13
- huggingface_hub/cli/upload.py +294 -0
- huggingface_hub/cli/upload_large_folder.py +117 -0
- huggingface_hub/community.py +20 -12
- huggingface_hub/constants.py +83 -59
- huggingface_hub/dataclasses.py +609 -0
- huggingface_hub/errors.py +99 -30
- huggingface_hub/fastai_utils.py +30 -41
- huggingface_hub/file_download.py +606 -346
- huggingface_hub/hf_api.py +2445 -1132
- huggingface_hub/hf_file_system.py +269 -152
- huggingface_hub/hub_mixin.py +61 -66
- huggingface_hub/inference/_client.py +501 -630
- huggingface_hub/inference/_common.py +133 -121
- huggingface_hub/inference/_generated/_async_client.py +536 -722
- huggingface_hub/inference/_generated/types/__init__.py +6 -1
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +5 -6
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +77 -31
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/image_to_image.py +8 -2
- huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
- huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +11 -11
- huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
- huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/__init__.py +0 -0
- huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
- huggingface_hub/inference/_mcp/agent.py +100 -0
- huggingface_hub/inference/_mcp/cli.py +247 -0
- huggingface_hub/inference/_mcp/constants.py +81 -0
- huggingface_hub/inference/_mcp/mcp_client.py +395 -0
- huggingface_hub/inference/_mcp/types.py +45 -0
- huggingface_hub/inference/_mcp/utils.py +128 -0
- huggingface_hub/inference/_providers/__init__.py +149 -20
- huggingface_hub/inference/_providers/_common.py +160 -37
- huggingface_hub/inference/_providers/black_forest_labs.py +12 -9
- huggingface_hub/inference/_providers/cerebras.py +6 -0
- huggingface_hub/inference/_providers/clarifai.py +13 -0
- huggingface_hub/inference/_providers/cohere.py +32 -0
- huggingface_hub/inference/_providers/fal_ai.py +231 -22
- huggingface_hub/inference/_providers/featherless_ai.py +38 -0
- huggingface_hub/inference/_providers/fireworks_ai.py +22 -1
- huggingface_hub/inference/_providers/groq.py +9 -0
- huggingface_hub/inference/_providers/hf_inference.py +143 -33
- huggingface_hub/inference/_providers/hyperbolic.py +9 -5
- huggingface_hub/inference/_providers/nebius.py +47 -5
- huggingface_hub/inference/_providers/novita.py +48 -5
- huggingface_hub/inference/_providers/nscale.py +44 -0
- huggingface_hub/inference/_providers/openai.py +25 -0
- huggingface_hub/inference/_providers/publicai.py +6 -0
- huggingface_hub/inference/_providers/replicate.py +46 -9
- huggingface_hub/inference/_providers/sambanova.py +37 -1
- huggingface_hub/inference/_providers/scaleway.py +28 -0
- huggingface_hub/inference/_providers/together.py +34 -5
- huggingface_hub/inference/_providers/wavespeed.py +138 -0
- huggingface_hub/inference/_providers/zai_org.py +17 -0
- huggingface_hub/lfs.py +33 -100
- huggingface_hub/repocard.py +34 -38
- huggingface_hub/repocard_data.py +79 -59
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +12 -15
- huggingface_hub/serialization/_dduf.py +8 -8
- huggingface_hub/serialization/_torch.py +69 -69
- huggingface_hub/utils/__init__.py +27 -8
- huggingface_hub/utils/_auth.py +7 -7
- huggingface_hub/utils/_cache_manager.py +92 -147
- huggingface_hub/utils/_chunk_utils.py +2 -3
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +55 -0
- huggingface_hub/utils/_experimental.py +7 -5
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +5 -5
- huggingface_hub/utils/_headers.py +8 -30
- huggingface_hub/utils/_http.py +399 -237
- huggingface_hub/utils/_pagination.py +6 -6
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +74 -22
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +13 -11
- huggingface_hub/utils/_telemetry.py +4 -4
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +55 -74
- huggingface_hub/utils/_verification.py +167 -0
- huggingface_hub/utils/_xet.py +235 -0
- huggingface_hub/utils/_xet_progress_reporting.py +162 -0
- huggingface_hub/utils/insecure_hashlib.py +3 -5
- huggingface_hub/utils/logging.py +8 -11
- huggingface_hub/utils/tqdm.py +33 -4
- {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -82
- huggingface_hub-1.1.3.dist-info/RECORD +155 -0
- {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
- huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
- huggingface_hub/commands/delete_cache.py +0 -428
- huggingface_hub/commands/download.py +0 -200
- huggingface_hub/commands/huggingface_cli.py +0 -61
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo_files.py +0 -128
- huggingface_hub/commands/scan_cache.py +0 -181
- huggingface_hub/commands/tag.py +0 -159
- huggingface_hub/commands/upload.py +0 -299
- huggingface_hub/commands/upload_large_folder.py +0 -129
- huggingface_hub/commands/user.py +0 -304
- huggingface_hub/commands/version.py +0 -37
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.29.0rc2.dist-info/RECORD +0 -131
- huggingface_hub-0.29.0rc2.dist-info/entry_points.txt +0 -6
- {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
- {huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
huggingface_hub/repocard_data.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional, Union
|
|
5
5
|
|
|
6
6
|
from huggingface_hub.utils import logging, yaml_dump
|
|
7
7
|
|
|
@@ -38,7 +38,7 @@ class EvalResult:
|
|
|
38
38
|
dataset_revision (`str`, *optional*):
|
|
39
39
|
The revision (AKA Git Sha) of the dataset used in `load_dataset()`.
|
|
40
40
|
Example: 5503434ddd753f426f4b38109466949a1217c2bb
|
|
41
|
-
dataset_args (`
|
|
41
|
+
dataset_args (`dict[str, Any]`, *optional*):
|
|
42
42
|
The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}`
|
|
43
43
|
metric_name (`str`, *optional*):
|
|
44
44
|
A pretty name for the metric. Example: "Test WER".
|
|
@@ -46,7 +46,7 @@ class EvalResult:
|
|
|
46
46
|
The name of the metric configuration used in `load_metric()`.
|
|
47
47
|
Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`.
|
|
48
48
|
See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations
|
|
49
|
-
metric_args (`
|
|
49
|
+
metric_args (`dict[str, Any]`, *optional*):
|
|
50
50
|
The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4
|
|
51
51
|
verified (`bool`, *optional*):
|
|
52
52
|
Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
|
@@ -102,7 +102,7 @@ class EvalResult:
|
|
|
102
102
|
|
|
103
103
|
# The arguments passed during `Metric.compute()`.
|
|
104
104
|
# Example for `bleu`: max_order: 4
|
|
105
|
-
dataset_args: Optional[
|
|
105
|
+
dataset_args: Optional[dict[str, Any]] = None
|
|
106
106
|
|
|
107
107
|
# A pretty name for the metric.
|
|
108
108
|
# Example: Test WER
|
|
@@ -115,7 +115,7 @@ class EvalResult:
|
|
|
115
115
|
|
|
116
116
|
# The arguments passed during `Metric.compute()`.
|
|
117
117
|
# Example for `bleu`: max_order: 4
|
|
118
|
-
metric_args: Optional[
|
|
118
|
+
metric_args: Optional[dict[str, Any]] = None
|
|
119
119
|
|
|
120
120
|
# Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set.
|
|
121
121
|
verified: Optional[bool] = None
|
|
@@ -195,7 +195,7 @@ class CardData:
|
|
|
195
195
|
"""
|
|
196
196
|
pass
|
|
197
197
|
|
|
198
|
-
def to_yaml(self, line_break=None, original_order: Optional[
|
|
198
|
+
def to_yaml(self, line_break=None, original_order: Optional[list[str]] = None) -> str:
|
|
199
199
|
"""Dumps CardData to a YAML block for inclusion in a README.md file.
|
|
200
200
|
|
|
201
201
|
Args:
|
|
@@ -245,21 +245,38 @@ class CardData:
|
|
|
245
245
|
return len(self.__dict__)
|
|
246
246
|
|
|
247
247
|
|
|
248
|
+
def _validate_eval_results(
|
|
249
|
+
eval_results: Optional[Union[EvalResult, list[EvalResult]]],
|
|
250
|
+
model_name: Optional[str],
|
|
251
|
+
) -> list[EvalResult]:
|
|
252
|
+
if eval_results is None:
|
|
253
|
+
return []
|
|
254
|
+
if isinstance(eval_results, EvalResult):
|
|
255
|
+
eval_results = [eval_results]
|
|
256
|
+
if not isinstance(eval_results, list) or not all(isinstance(r, EvalResult) for r in eval_results):
|
|
257
|
+
raise ValueError(
|
|
258
|
+
f"`eval_results` should be of type `EvalResult` or a list of `EvalResult`, got {type(eval_results)}."
|
|
259
|
+
)
|
|
260
|
+
if model_name is None:
|
|
261
|
+
raise ValueError("Passing `eval_results` requires `model_name` to be set.")
|
|
262
|
+
return eval_results
|
|
263
|
+
|
|
264
|
+
|
|
248
265
|
class ModelCardData(CardData):
|
|
249
266
|
"""Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
250
267
|
|
|
251
268
|
Args:
|
|
252
|
-
base_model (`str` or `
|
|
269
|
+
base_model (`str` or `list[str]`, *optional*):
|
|
253
270
|
The identifier of the base model from which the model derives. This is applicable for example if your model is a
|
|
254
271
|
fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
|
|
255
272
|
if your model derives from multiple models). Defaults to None.
|
|
256
|
-
datasets (`Union[str,
|
|
273
|
+
datasets (`Union[str, list[str]]`, *optional*):
|
|
257
274
|
Dataset or list of datasets that were used to train this model. Should be a dataset ID
|
|
258
275
|
found on https://hf.co/datasets. Defaults to None.
|
|
259
|
-
eval_results (`Union[
|
|
276
|
+
eval_results (`Union[list[EvalResult], EvalResult]`, *optional*):
|
|
260
277
|
List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided,
|
|
261
278
|
`model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`.
|
|
262
|
-
language (`Union[str,
|
|
279
|
+
language (`Union[str, list[str]]`, *optional*):
|
|
263
280
|
Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or
|
|
264
281
|
639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`.
|
|
265
282
|
library_name (`str`, *optional*):
|
|
@@ -275,7 +292,7 @@ class ModelCardData(CardData):
|
|
|
275
292
|
license_link (`str`, *optional*):
|
|
276
293
|
Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`.
|
|
277
294
|
Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead.
|
|
278
|
-
metrics (`
|
|
295
|
+
metrics (`list[str]`, *optional*):
|
|
279
296
|
List of metrics used to evaluate this model. Should be a metric name that can be found
|
|
280
297
|
at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
|
|
281
298
|
model_name (`str`, *optional*):
|
|
@@ -285,7 +302,7 @@ class ModelCardData(CardData):
|
|
|
285
302
|
then the repo name is used as a default. Defaults to None.
|
|
286
303
|
pipeline_tag (`str`, *optional*):
|
|
287
304
|
The pipeline tag associated with the model. Example: "text-classification".
|
|
288
|
-
tags (`
|
|
305
|
+
tags (`list[str]`, *optional*):
|
|
289
306
|
List of tags to add to your model that can be used when filtering on the Hugging
|
|
290
307
|
Face Hub. Defaults to None.
|
|
291
308
|
ignore_metadata_errors (`str`):
|
|
@@ -312,18 +329,18 @@ class ModelCardData(CardData):
|
|
|
312
329
|
def __init__(
|
|
313
330
|
self,
|
|
314
331
|
*,
|
|
315
|
-
base_model: Optional[Union[str,
|
|
316
|
-
datasets: Optional[Union[str,
|
|
317
|
-
eval_results: Optional[
|
|
318
|
-
language: Optional[Union[str,
|
|
332
|
+
base_model: Optional[Union[str, list[str]]] = None,
|
|
333
|
+
datasets: Optional[Union[str, list[str]]] = None,
|
|
334
|
+
eval_results: Optional[list[EvalResult]] = None,
|
|
335
|
+
language: Optional[Union[str, list[str]]] = None,
|
|
319
336
|
library_name: Optional[str] = None,
|
|
320
337
|
license: Optional[str] = None,
|
|
321
338
|
license_name: Optional[str] = None,
|
|
322
339
|
license_link: Optional[str] = None,
|
|
323
|
-
metrics: Optional[
|
|
340
|
+
metrics: Optional[list[str]] = None,
|
|
324
341
|
model_name: Optional[str] = None,
|
|
325
342
|
pipeline_tag: Optional[str] = None,
|
|
326
|
-
tags: Optional[
|
|
343
|
+
tags: Optional[list[str]] = None,
|
|
327
344
|
ignore_metadata_errors: bool = False,
|
|
328
345
|
**kwargs,
|
|
329
346
|
):
|
|
@@ -359,15 +376,18 @@ class ModelCardData(CardData):
|
|
|
359
376
|
super().__init__(**kwargs)
|
|
360
377
|
|
|
361
378
|
if self.eval_results:
|
|
362
|
-
|
|
363
|
-
self.eval_results =
|
|
364
|
-
|
|
365
|
-
|
|
379
|
+
try:
|
|
380
|
+
self.eval_results = _validate_eval_results(self.eval_results, self.model_name)
|
|
381
|
+
except Exception as e:
|
|
382
|
+
if ignore_metadata_errors:
|
|
383
|
+
logger.warning(f"Failed to validate eval_results: {e}. Not loading eval results into CardData.")
|
|
384
|
+
else:
|
|
385
|
+
raise ValueError(f"Failed to validate eval_results: {e}") from e
|
|
366
386
|
|
|
367
387
|
def _to_dict(self, data_dict):
|
|
368
388
|
"""Format the internal data dict. In this case, we convert eval results to a valid model index"""
|
|
369
389
|
if self.eval_results is not None:
|
|
370
|
-
data_dict["model-index"] = eval_results_to_model_index(self.model_name, self.eval_results)
|
|
390
|
+
data_dict["model-index"] = eval_results_to_model_index(self.model_name, self.eval_results) # type: ignore
|
|
371
391
|
del data_dict["eval_results"], data_dict["model_name"]
|
|
372
392
|
|
|
373
393
|
|
|
@@ -375,58 +395,58 @@ class DatasetCardData(CardData):
|
|
|
375
395
|
"""Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
376
396
|
|
|
377
397
|
Args:
|
|
378
|
-
language (`
|
|
398
|
+
language (`list[str]`, *optional*):
|
|
379
399
|
Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or
|
|
380
400
|
639-3 code (two/three letters), or a special value like "code", "multilingual".
|
|
381
|
-
license (`Union[str,
|
|
401
|
+
license (`Union[str, list[str]]`, *optional*):
|
|
382
402
|
License(s) of this dataset. Example: apache-2.0 or any license from
|
|
383
403
|
https://huggingface.co/docs/hub/repositories-licenses.
|
|
384
|
-
annotations_creators (`Union[str,
|
|
404
|
+
annotations_creators (`Union[str, list[str]]`, *optional*):
|
|
385
405
|
How the annotations for the dataset were created.
|
|
386
406
|
Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'.
|
|
387
|
-
language_creators (`Union[str,
|
|
407
|
+
language_creators (`Union[str, list[str]]`, *optional*):
|
|
388
408
|
How the text-based data in the dataset was created.
|
|
389
409
|
Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other'
|
|
390
|
-
multilinguality (`Union[str,
|
|
410
|
+
multilinguality (`Union[str, list[str]]`, *optional*):
|
|
391
411
|
Whether the dataset is multilingual.
|
|
392
412
|
Options are: 'monolingual', 'multilingual', 'translation', 'other'.
|
|
393
|
-
size_categories (`Union[str,
|
|
413
|
+
size_categories (`Union[str, list[str]]`, *optional*):
|
|
394
414
|
The number of examples in the dataset. Options are: 'n<1K', '1K<n<10K', '10K<n<100K',
|
|
395
415
|
'100K<n<1M', '1M<n<10M', '10M<n<100M', '100M<n<1B', '1B<n<10B', '10B<n<100B', '100B<n<1T', 'n>1T', and 'other'.
|
|
396
|
-
source_datasets (`
|
|
416
|
+
source_datasets (`list[str]]`, *optional*):
|
|
397
417
|
Indicates whether the dataset is an original dataset or extended from another existing dataset.
|
|
398
418
|
Options are: 'original' and 'extended'.
|
|
399
|
-
task_categories (`Union[str,
|
|
419
|
+
task_categories (`Union[str, list[str]]`, *optional*):
|
|
400
420
|
What categories of task does the dataset support?
|
|
401
|
-
task_ids (`Union[str,
|
|
421
|
+
task_ids (`Union[str, list[str]]`, *optional*):
|
|
402
422
|
What specific tasks does the dataset support?
|
|
403
423
|
paperswithcode_id (`str`, *optional*):
|
|
404
424
|
ID of the dataset on PapersWithCode.
|
|
405
425
|
pretty_name (`str`, *optional*):
|
|
406
426
|
A more human-readable name for the dataset. (ex. "Cats vs. Dogs")
|
|
407
|
-
train_eval_index (`
|
|
427
|
+
train_eval_index (`dict`, *optional*):
|
|
408
428
|
A dictionary that describes the necessary spec for doing evaluation on the Hub.
|
|
409
429
|
If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
|
|
410
|
-
config_names (`Union[str,
|
|
430
|
+
config_names (`Union[str, list[str]]`, *optional*):
|
|
411
431
|
A list of the available dataset configs for the dataset.
|
|
412
432
|
"""
|
|
413
433
|
|
|
414
434
|
def __init__(
|
|
415
435
|
self,
|
|
416
436
|
*,
|
|
417
|
-
language: Optional[Union[str,
|
|
418
|
-
license: Optional[Union[str,
|
|
419
|
-
annotations_creators: Optional[Union[str,
|
|
420
|
-
language_creators: Optional[Union[str,
|
|
421
|
-
multilinguality: Optional[Union[str,
|
|
422
|
-
size_categories: Optional[Union[str,
|
|
423
|
-
source_datasets: Optional[
|
|
424
|
-
task_categories: Optional[Union[str,
|
|
425
|
-
task_ids: Optional[Union[str,
|
|
437
|
+
language: Optional[Union[str, list[str]]] = None,
|
|
438
|
+
license: Optional[Union[str, list[str]]] = None,
|
|
439
|
+
annotations_creators: Optional[Union[str, list[str]]] = None,
|
|
440
|
+
language_creators: Optional[Union[str, list[str]]] = None,
|
|
441
|
+
multilinguality: Optional[Union[str, list[str]]] = None,
|
|
442
|
+
size_categories: Optional[Union[str, list[str]]] = None,
|
|
443
|
+
source_datasets: Optional[list[str]] = None,
|
|
444
|
+
task_categories: Optional[Union[str, list[str]]] = None,
|
|
445
|
+
task_ids: Optional[Union[str, list[str]]] = None,
|
|
426
446
|
paperswithcode_id: Optional[str] = None,
|
|
427
447
|
pretty_name: Optional[str] = None,
|
|
428
|
-
train_eval_index: Optional[
|
|
429
|
-
config_names: Optional[Union[str,
|
|
448
|
+
train_eval_index: Optional[dict] = None,
|
|
449
|
+
config_names: Optional[Union[str, list[str]]] = None,
|
|
430
450
|
ignore_metadata_errors: bool = False,
|
|
431
451
|
**kwargs,
|
|
432
452
|
):
|
|
@@ -475,11 +495,11 @@ class SpaceCardData(CardData):
|
|
|
475
495
|
https://huggingface.co/docs/hub/repositories-licenses.
|
|
476
496
|
duplicated_from (`str`, *optional*)
|
|
477
497
|
ID of the original Space if this is a duplicated Space.
|
|
478
|
-
models (
|
|
498
|
+
models (list[`str`], *optional*)
|
|
479
499
|
List of models related to this Space. Should be a dataset ID found on https://hf.co/models.
|
|
480
|
-
datasets (`
|
|
500
|
+
datasets (`list[str]`, *optional*)
|
|
481
501
|
List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
|
|
482
|
-
tags (`
|
|
502
|
+
tags (`list[str]`, *optional*)
|
|
483
503
|
List of tags to add to your Space that can be used when filtering on the Hub.
|
|
484
504
|
ignore_metadata_errors (`str`):
|
|
485
505
|
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
|
@@ -512,9 +532,9 @@ class SpaceCardData(CardData):
|
|
|
512
532
|
app_port: Optional[int] = None,
|
|
513
533
|
license: Optional[str] = None,
|
|
514
534
|
duplicated_from: Optional[str] = None,
|
|
515
|
-
models: Optional[
|
|
516
|
-
datasets: Optional[
|
|
517
|
-
tags: Optional[
|
|
535
|
+
models: Optional[list[str]] = None,
|
|
536
|
+
datasets: Optional[list[str]] = None,
|
|
537
|
+
tags: Optional[list[str]] = None,
|
|
518
538
|
ignore_metadata_errors: bool = False,
|
|
519
539
|
**kwargs,
|
|
520
540
|
):
|
|
@@ -532,14 +552,14 @@ class SpaceCardData(CardData):
|
|
|
532
552
|
super().__init__(**kwargs)
|
|
533
553
|
|
|
534
554
|
|
|
535
|
-
def model_index_to_eval_results(model_index:
|
|
555
|
+
def model_index_to_eval_results(model_index: list[dict[str, Any]]) -> tuple[str, list[EvalResult]]:
|
|
536
556
|
"""Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects.
|
|
537
557
|
|
|
538
558
|
A detailed spec of the model index can be found here:
|
|
539
559
|
https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
|
|
540
560
|
|
|
541
561
|
Args:
|
|
542
|
-
model_index (`
|
|
562
|
+
model_index (`list[dict[str, Any]]`):
|
|
543
563
|
A model index data structure, likely coming from a README.md file on the
|
|
544
564
|
Hugging Face Hub.
|
|
545
565
|
|
|
@@ -547,7 +567,7 @@ def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str,
|
|
|
547
567
|
model_name (`str`):
|
|
548
568
|
The name of the model as found in the model index. This is used as the
|
|
549
569
|
identifier for the model on leaderboards like PapersWithCode.
|
|
550
|
-
eval_results (`
|
|
570
|
+
eval_results (`list[EvalResult]`):
|
|
551
571
|
A list of `huggingface_hub.EvalResult` objects containing the metrics
|
|
552
572
|
reported in the provided model_index.
|
|
553
573
|
|
|
@@ -648,7 +668,7 @@ def _remove_none(obj):
|
|
|
648
668
|
return obj
|
|
649
669
|
|
|
650
670
|
|
|
651
|
-
def eval_results_to_model_index(model_name: str, eval_results:
|
|
671
|
+
def eval_results_to_model_index(model_name: str, eval_results: list[EvalResult]) -> list[dict[str, Any]]:
|
|
652
672
|
"""Takes in given model name and list of `huggingface_hub.EvalResult` and returns a
|
|
653
673
|
valid model-index that will be compatible with the format expected by the
|
|
654
674
|
Hugging Face Hub.
|
|
@@ -657,12 +677,12 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
657
677
|
model_name (`str`):
|
|
658
678
|
Name of the model (ex. "my-cool-model"). This is used as the identifier
|
|
659
679
|
for the model on leaderboards like PapersWithCode.
|
|
660
|
-
eval_results (`
|
|
680
|
+
eval_results (`list[EvalResult]`):
|
|
661
681
|
List of `huggingface_hub.EvalResult` objects containing the metrics to be
|
|
662
682
|
reported in the model-index.
|
|
663
683
|
|
|
664
684
|
Returns:
|
|
665
|
-
model_index (`
|
|
685
|
+
model_index (`list[dict[str, Any]]`): The eval_results converted to a model-index.
|
|
666
686
|
|
|
667
687
|
Example:
|
|
668
688
|
```python
|
|
@@ -685,7 +705,7 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
685
705
|
|
|
686
706
|
# Metrics are reported on a unique task-and-dataset basis.
|
|
687
707
|
# Here, we make a map of those pairs and the associated EvalResults.
|
|
688
|
-
task_and_ds_types_map:
|
|
708
|
+
task_and_ds_types_map: dict[Any, list[EvalResult]] = defaultdict(list)
|
|
689
709
|
for eval_result in eval_results:
|
|
690
710
|
task_and_ds_types_map[eval_result.unique_identifier].append(eval_result)
|
|
691
711
|
|
|
@@ -740,7 +760,7 @@ def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult])
|
|
|
740
760
|
return _remove_none(model_index)
|
|
741
761
|
|
|
742
762
|
|
|
743
|
-
def _to_unique_list(tags: Optional[
|
|
763
|
+
def _to_unique_list(tags: Optional[list[str]]) -> Optional[list[str]]:
|
|
744
764
|
if tags is None:
|
|
745
765
|
return tags
|
|
746
766
|
unique_tags = [] # make tags unique + keep order explicitly
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
"""Contains helpers to serialize tensors."""
|
|
16
16
|
|
|
17
17
|
from ._base import StateDictSplit, split_state_dict_into_shards_factory
|
|
18
|
-
from ._tensorflow import get_tf_storage_size, split_tf_state_dict_into_shards
|
|
19
18
|
from ._torch import (
|
|
20
19
|
get_torch_storage_id,
|
|
21
20
|
get_torch_storage_size,
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"""Contains helpers to split tensors into shards."""
|
|
15
15
|
|
|
16
16
|
from dataclasses import dataclass, field
|
|
17
|
-
from typing import Any, Callable,
|
|
17
|
+
from typing import Any, Callable, Optional, TypeVar, Union
|
|
18
18
|
|
|
19
19
|
from .. import logging
|
|
20
20
|
|
|
@@ -38,16 +38,16 @@ logger = logging.get_logger(__file__)
|
|
|
38
38
|
@dataclass
|
|
39
39
|
class StateDictSplit:
|
|
40
40
|
is_sharded: bool = field(init=False)
|
|
41
|
-
metadata:
|
|
42
|
-
filename_to_tensors:
|
|
43
|
-
tensor_to_filename:
|
|
41
|
+
metadata: dict[str, Any]
|
|
42
|
+
filename_to_tensors: dict[str, list[str]]
|
|
43
|
+
tensor_to_filename: dict[str, str]
|
|
44
44
|
|
|
45
45
|
def __post_init__(self):
|
|
46
46
|
self.is_sharded = len(self.filename_to_tensors) > 1
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def split_state_dict_into_shards_factory(
|
|
50
|
-
state_dict:
|
|
50
|
+
state_dict: dict[str, TensorT],
|
|
51
51
|
*,
|
|
52
52
|
get_storage_size: TensorSizeFn_T,
|
|
53
53
|
filename_pattern: str,
|
|
@@ -62,15 +62,12 @@ def split_state_dict_into_shards_factory(
|
|
|
62
62
|
have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
|
|
63
63
|
[6+2+2GB], [6+2GB], [6GB].
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
size greater than `max_shard_size`.
|
|
69
|
-
|
|
70
|
-
</Tip>
|
|
65
|
+
> [!WARNING]
|
|
66
|
+
> If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
|
|
67
|
+
> size greater than `max_shard_size`.
|
|
71
68
|
|
|
72
69
|
Args:
|
|
73
|
-
state_dict (`
|
|
70
|
+
state_dict (`dict[str, Tensor]`):
|
|
74
71
|
The state dictionary to save.
|
|
75
72
|
get_storage_size (`Callable[[Tensor], int]`):
|
|
76
73
|
A function that returns the size of a tensor when saved on disk in bytes.
|
|
@@ -87,10 +84,10 @@ def split_state_dict_into_shards_factory(
|
|
|
87
84
|
Returns:
|
|
88
85
|
[`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
|
|
89
86
|
"""
|
|
90
|
-
storage_id_to_tensors:
|
|
87
|
+
storage_id_to_tensors: dict[Any, list[str]] = {}
|
|
91
88
|
|
|
92
|
-
shard_list:
|
|
93
|
-
current_shard:
|
|
89
|
+
shard_list: list[dict[str, TensorT]] = []
|
|
90
|
+
current_shard: dict[str, TensorT] = {}
|
|
94
91
|
current_shard_size = 0
|
|
95
92
|
total_size = 0
|
|
96
93
|
|
|
@@ -7,7 +7,7 @@ import zipfile
|
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any,
|
|
10
|
+
from typing import Any, Generator, Iterable, Union
|
|
11
11
|
|
|
12
12
|
from ..errors import DDUFCorruptedFileError, DDUFExportError, DDUFInvalidEntryNameError
|
|
13
13
|
|
|
@@ -87,7 +87,7 @@ class DDUFEntry:
|
|
|
87
87
|
return f.read(self.length).decode(encoding=encoding)
|
|
88
88
|
|
|
89
89
|
|
|
90
|
-
def read_dduf_file(dduf_path: Union[os.PathLike, str]) ->
|
|
90
|
+
def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> dict[str, DDUFEntry]:
|
|
91
91
|
"""
|
|
92
92
|
Read a DDUF file and return a dictionary of entries.
|
|
93
93
|
|
|
@@ -98,7 +98,7 @@ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
|
|
|
98
98
|
The path to the DDUF file to read.
|
|
99
99
|
|
|
100
100
|
Returns:
|
|
101
|
-
`
|
|
101
|
+
`dict[str, DDUFEntry]`:
|
|
102
102
|
A dictionary of [`DDUFEntry`] indexed by filename.
|
|
103
103
|
|
|
104
104
|
Raises:
|
|
@@ -157,7 +157,7 @@ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
|
|
|
157
157
|
|
|
158
158
|
|
|
159
159
|
def export_entries_as_dduf(
|
|
160
|
-
dduf_path: Union[str, os.PathLike], entries: Iterable[
|
|
160
|
+
dduf_path: Union[str, os.PathLike], entries: Iterable[tuple[str, Union[str, Path, bytes]]]
|
|
161
161
|
) -> None:
|
|
162
162
|
"""Write a DDUF file from an iterable of entries.
|
|
163
163
|
|
|
@@ -167,7 +167,7 @@ def export_entries_as_dduf(
|
|
|
167
167
|
Args:
|
|
168
168
|
dduf_path (`str` or `os.PathLike`):
|
|
169
169
|
The path to the DDUF file to write.
|
|
170
|
-
entries (`Iterable[
|
|
170
|
+
entries (`Iterable[tuple[str, Union[str, Path, bytes]]]`):
|
|
171
171
|
An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
|
|
172
172
|
The filename should be the path to the file in the DDUF archive.
|
|
173
173
|
The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.
|
|
@@ -201,8 +201,8 @@ def export_entries_as_dduf(
|
|
|
201
201
|
>>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
|
|
202
202
|
... # ... do some work with the pipeline
|
|
203
203
|
|
|
204
|
-
>>> def as_entries(pipe: DiffusionPipeline) -> Generator[
|
|
205
|
-
... # Build
|
|
204
|
+
>>> def as_entries(pipe: DiffusionPipeline) -> Generator[tuple[str, bytes], None, None]:
|
|
205
|
+
... # Build a generator that yields the entries to add to the DDUF file.
|
|
206
206
|
... # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
|
|
207
207
|
... # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
|
|
208
208
|
... yield "vae/config.json", pipe.vae.to_json_string().encode()
|
|
@@ -267,7 +267,7 @@ def export_folder_as_dduf(dduf_path: Union[str, os.PathLike], folder_path: Union
|
|
|
267
267
|
"""
|
|
268
268
|
folder_path = Path(folder_path)
|
|
269
269
|
|
|
270
|
-
def _iterate_over_folder() -> Iterable[
|
|
270
|
+
def _iterate_over_folder() -> Iterable[tuple[str, Path]]:
|
|
271
271
|
for path in Path(folder_path).glob("**/*"):
|
|
272
272
|
if not path.is_file():
|
|
273
273
|
continue
|