huggingface-hub 0.12.1__py3-none-any.whl → 0.13.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +165 -127
- huggingface_hub/_commit_api.py +25 -51
- huggingface_hub/_login.py +4 -13
- huggingface_hub/_snapshot_download.py +45 -23
- huggingface_hub/_space_api.py +7 -0
- huggingface_hub/commands/delete_cache.py +13 -39
- huggingface_hub/commands/env.py +1 -3
- huggingface_hub/commands/huggingface_cli.py +1 -3
- huggingface_hub/commands/lfs.py +4 -8
- huggingface_hub/commands/scan_cache.py +5 -16
- huggingface_hub/commands/user.py +27 -45
- huggingface_hub/community.py +4 -4
- huggingface_hub/constants.py +22 -19
- huggingface_hub/fastai_utils.py +14 -23
- huggingface_hub/file_download.py +166 -108
- huggingface_hub/hf_api.py +500 -255
- huggingface_hub/hub_mixin.py +181 -176
- huggingface_hub/inference_api.py +4 -10
- huggingface_hub/keras_mixin.py +39 -71
- huggingface_hub/lfs.py +8 -24
- huggingface_hub/repocard.py +33 -48
- huggingface_hub/repocard_data.py +141 -30
- huggingface_hub/repository.py +41 -112
- huggingface_hub/templates/modelcard_template.md +39 -34
- huggingface_hub/utils/__init__.py +1 -0
- huggingface_hub/utils/_cache_assets.py +1 -4
- huggingface_hub/utils/_cache_manager.py +17 -39
- huggingface_hub/utils/_deprecation.py +8 -12
- huggingface_hub/utils/_errors.py +10 -57
- huggingface_hub/utils/_fixes.py +2 -6
- huggingface_hub/utils/_git_credential.py +5 -16
- huggingface_hub/utils/_headers.py +22 -11
- huggingface_hub/utils/_http.py +1 -4
- huggingface_hub/utils/_paths.py +5 -12
- huggingface_hub/utils/_runtime.py +2 -1
- huggingface_hub/utils/_telemetry.py +120 -0
- huggingface_hub/utils/_validators.py +5 -13
- huggingface_hub/utils/endpoint_helpers.py +1 -3
- huggingface_hub/utils/logging.py +10 -8
- {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/METADATA +7 -14
- huggingface_hub-0.13.0rc0.dist-info/RECORD +56 -0
- huggingface_hub/py.typed +0 -0
- huggingface_hub-0.12.1.dist-info/RECORD +0 -56
- {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/keras_mixin.py
CHANGED
|
@@ -5,10 +5,8 @@ import warnings
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from shutil import copytree
|
|
7
7
|
from typing import Any, Dict, List, Optional, Union
|
|
8
|
-
from urllib.parse import quote
|
|
9
8
|
|
|
10
|
-
from huggingface_hub import
|
|
11
|
-
from huggingface_hub._commit_api import CommitOperation
|
|
9
|
+
from huggingface_hub import ModelHubMixin, snapshot_download
|
|
12
10
|
from huggingface_hub.utils import (
|
|
13
11
|
get_tf_version,
|
|
14
12
|
is_graphviz_available,
|
|
@@ -17,8 +15,8 @@ from huggingface_hub.utils import (
|
|
|
17
15
|
yaml_dump,
|
|
18
16
|
)
|
|
19
17
|
|
|
20
|
-
from .constants import CONFIG_NAME
|
|
21
|
-
from .hf_api import HfApi
|
|
18
|
+
from .constants import CONFIG_NAME
|
|
19
|
+
from .hf_api import HfApi
|
|
22
20
|
from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
|
|
23
21
|
|
|
24
22
|
|
|
@@ -36,7 +34,7 @@ def _flatten_dict(dictionary, parent_key=""):
|
|
|
36
34
|
dictionary (`dict`):
|
|
37
35
|
The nested dictionary to be flattened.
|
|
38
36
|
parent_key (`str`):
|
|
39
|
-
The parent key to be prefixed to the
|
|
37
|
+
The parent key to be prefixed to the children keys.
|
|
40
38
|
Necessary for recursing over the nested dictionary.
|
|
41
39
|
|
|
42
40
|
Returns:
|
|
@@ -63,9 +61,7 @@ def _create_hyperparameter_table(model):
|
|
|
63
61
|
optimizer_params = model.optimizer.get_config()
|
|
64
62
|
# flatten the configuration
|
|
65
63
|
optimizer_params = _flatten_dict(optimizer_params)
|
|
66
|
-
optimizer_params[
|
|
67
|
-
"training_precision"
|
|
68
|
-
] = tf.keras.mixed_precision.global_policy().name
|
|
64
|
+
optimizer_params["training_precision"] = tf.keras.mixed_precision.global_policy().name
|
|
69
65
|
table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
|
|
70
66
|
for key, value in optimizer_params.items():
|
|
71
67
|
table += f"| {key} | {value} |\n"
|
|
@@ -170,9 +166,7 @@ def save_pretrained_keras(
|
|
|
170
166
|
if is_tf_available():
|
|
171
167
|
import tensorflow as tf
|
|
172
168
|
else:
|
|
173
|
-
raise ImportError(
|
|
174
|
-
"Called a Tensorflow-specific function but could not import it."
|
|
175
|
-
)
|
|
169
|
+
raise ImportError("Called a Tensorflow-specific function but could not import it.")
|
|
176
170
|
|
|
177
171
|
if not model.built:
|
|
178
172
|
raise ValueError("Model should be built before trying to save")
|
|
@@ -183,10 +177,7 @@ def save_pretrained_keras(
|
|
|
183
177
|
# saving config
|
|
184
178
|
if config:
|
|
185
179
|
if not isinstance(config, dict):
|
|
186
|
-
raise RuntimeError(
|
|
187
|
-
"Provided config to save_pretrained_keras should be a dict. Got:"
|
|
188
|
-
f" '{type(config)}'"
|
|
189
|
-
)
|
|
180
|
+
raise RuntimeError(f"Provided config to save_pretrained_keras should be a dict. Got: '{type(config)}'")
|
|
190
181
|
|
|
191
182
|
with (save_directory / CONFIG_NAME).open("w") as f:
|
|
192
183
|
json.dump(config, f)
|
|
@@ -213,25 +204,22 @@ def save_pretrained_keras(
|
|
|
213
204
|
path = save_directory / "history.json"
|
|
214
205
|
if path.exists():
|
|
215
206
|
warnings.warn(
|
|
216
|
-
"`history.json` file already exists, it will be overwritten by the"
|
|
217
|
-
" history of this version.",
|
|
207
|
+
"`history.json` file already exists, it will be overwritten by the history of this version.",
|
|
218
208
|
UserWarning,
|
|
219
209
|
)
|
|
220
210
|
with path.open("w", encoding="utf-8") as f:
|
|
221
211
|
json.dump(model.history.history, f, indent=2, sort_keys=True)
|
|
222
212
|
|
|
223
213
|
_create_model_card(model, save_directory, plot_model, metadata)
|
|
224
|
-
tf.keras.models.save_model(
|
|
225
|
-
model, save_directory, include_optimizer=include_optimizer, **model_save_kwargs
|
|
226
|
-
)
|
|
214
|
+
tf.keras.models.save_model(model, save_directory, include_optimizer=include_optimizer, **model_save_kwargs)
|
|
227
215
|
|
|
228
216
|
|
|
229
|
-
def from_pretrained_keras(*args, **kwargs):
|
|
217
|
+
def from_pretrained_keras(*args, **kwargs) -> "KerasModelHubMixin":
|
|
230
218
|
r"""
|
|
231
219
|
Instantiate a pretrained Keras model from a pre-trained model from the Hub.
|
|
232
220
|
The model is expected to be in `SavedModel` format.
|
|
233
221
|
|
|
234
|
-
|
|
222
|
+
Args:
|
|
235
223
|
pretrained_model_name_or_path (`str` or `os.PathLike`):
|
|
236
224
|
Can be either:
|
|
237
225
|
- A string, the `model id` of a pretrained model hosted inside a
|
|
@@ -299,6 +287,7 @@ def push_to_hub_keras(
|
|
|
299
287
|
create_pr: Optional[bool] = None,
|
|
300
288
|
allow_patterns: Optional[Union[List[str], str]] = None,
|
|
301
289
|
ignore_patterns: Optional[Union[List[str], str]] = None,
|
|
290
|
+
delete_patterns: Optional[Union[List[str], str]] = None,
|
|
302
291
|
log_dir: Optional[str] = None,
|
|
303
292
|
include_optimizer: bool = False,
|
|
304
293
|
tags: Optional[Union[list, str]] = None,
|
|
@@ -306,17 +295,16 @@ def push_to_hub_keras(
|
|
|
306
295
|
**model_save_kwargs,
|
|
307
296
|
):
|
|
308
297
|
"""
|
|
309
|
-
Upload model checkpoint
|
|
310
|
-
local clone of the repo in `repo_path_or_name`.
|
|
298
|
+
Upload model checkpoint to the Hub.
|
|
311
299
|
|
|
312
|
-
Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be
|
|
313
|
-
|
|
300
|
+
Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
|
|
301
|
+
`delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
|
|
302
|
+
details.
|
|
314
303
|
|
|
315
|
-
|
|
304
|
+
Args:
|
|
316
305
|
model (`Keras.Model`):
|
|
317
|
-
The [Keras
|
|
318
|
-
model
|
|
319
|
-
you'd like to push to the Hub. The model must be compiled and built.
|
|
306
|
+
The [Keras model](`https://www.tensorflow.org/api_docs/python/tf/keras/Model`) you'd like to push to the
|
|
307
|
+
Hub. The model must be compiled and built.
|
|
320
308
|
repo_id (`str`):
|
|
321
309
|
Repository name to which push
|
|
322
310
|
commit_message (`str`, *optional*, defaults to "Add Keras model"):
|
|
@@ -342,6 +330,8 @@ def push_to_hub_keras(
|
|
|
342
330
|
If provided, only files matching at least one pattern are pushed.
|
|
343
331
|
ignore_patterns (`List[str]` or `str`, *optional*):
|
|
344
332
|
If provided, files matching any of the patterns are not pushed.
|
|
333
|
+
delete_patterns (`List[str]` or `str`, *optional*):
|
|
334
|
+
If provided, remote files matching any of the patterns will be deleted from the repo.
|
|
345
335
|
log_dir (`str`, *optional*):
|
|
346
336
|
TensorBoard logging directory to be pushed. The Hub automatically
|
|
347
337
|
hosts and displays a TensorBoard instance if log files are included
|
|
@@ -362,13 +352,7 @@ def push_to_hub_keras(
|
|
|
362
352
|
The url of the commit of your model in the given repository.
|
|
363
353
|
"""
|
|
364
354
|
api = HfApi(endpoint=api_endpoint)
|
|
365
|
-
api.create_repo(
|
|
366
|
-
repo_id=repo_id,
|
|
367
|
-
repo_type="model",
|
|
368
|
-
token=token,
|
|
369
|
-
private=private,
|
|
370
|
-
exist_ok=True,
|
|
371
|
-
)
|
|
355
|
+
repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id
|
|
372
356
|
|
|
373
357
|
# Push the files to the repo in a single commit
|
|
374
358
|
with SoftTemporaryDirectory() as tmp:
|
|
@@ -383,46 +367,32 @@ def push_to_hub_keras(
|
|
|
383
367
|
**model_save_kwargs,
|
|
384
368
|
)
|
|
385
369
|
|
|
386
|
-
# If
|
|
387
|
-
operations: List[CommitOperation] = []
|
|
370
|
+
# If `log_dir` provided, delete remote logs and upload new ones
|
|
388
371
|
if log_dir is not None:
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
372
|
+
delete_patterns = (
|
|
373
|
+
[]
|
|
374
|
+
if delete_patterns is None
|
|
375
|
+
else (
|
|
376
|
+
[delete_patterns] # convert `delete_patterns` to a list
|
|
377
|
+
if isinstance(delete_patterns, str)
|
|
378
|
+
else delete_patterns
|
|
379
|
+
)
|
|
380
|
+
)
|
|
381
|
+
delete_patterns.append("logs/*")
|
|
397
382
|
copytree(log_dir, saved_path / "logs")
|
|
398
383
|
|
|
399
|
-
|
|
400
|
-
# duplicate code from `upload_folder`. We are not directly using
|
|
401
|
-
# `upload_folder` since we want to add delete operations to the
|
|
402
|
-
# commit as well.
|
|
403
|
-
operations += _prepare_upload_folder_commit(
|
|
404
|
-
saved_path,
|
|
405
|
-
path_in_repo="",
|
|
406
|
-
allow_patterns=allow_patterns,
|
|
407
|
-
ignore_patterns=ignore_patterns,
|
|
408
|
-
)
|
|
409
|
-
commit_info = api.create_commit(
|
|
384
|
+
return api.upload_folder(
|
|
410
385
|
repo_type="model",
|
|
411
386
|
repo_id=repo_id,
|
|
412
|
-
|
|
387
|
+
folder_path=saved_path,
|
|
413
388
|
commit_message=commit_message,
|
|
414
389
|
token=token,
|
|
415
390
|
revision=branch,
|
|
416
391
|
create_pr=create_pr,
|
|
392
|
+
allow_patterns=allow_patterns,
|
|
393
|
+
ignore_patterns=ignore_patterns,
|
|
394
|
+
delete_patterns=delete_patterns,
|
|
417
395
|
)
|
|
418
|
-
revision = branch
|
|
419
|
-
if revision is None:
|
|
420
|
-
revision = (
|
|
421
|
-
quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="")
|
|
422
|
-
if commit_info.pr_url is not None
|
|
423
|
-
else DEFAULT_REVISION
|
|
424
|
-
)
|
|
425
|
-
return f"{api.endpoint}/{repo_id}/tree/{revision}/"
|
|
426
396
|
|
|
427
397
|
|
|
428
398
|
class KerasModelHubMixin(ModelHubMixin):
|
|
@@ -486,9 +456,7 @@ class KerasModelHubMixin(ModelHubMixin):
|
|
|
486
456
|
if is_tf_available():
|
|
487
457
|
import tensorflow as tf
|
|
488
458
|
else:
|
|
489
|
-
raise ImportError(
|
|
490
|
-
"Called a TensorFlow-specific function but could not import it."
|
|
491
|
-
)
|
|
459
|
+
raise ImportError("Called a TensorFlow-specific function but could not import it.")
|
|
492
460
|
|
|
493
461
|
# TODO - Figure out what to do about these config values. Config is not going to be needed to load model
|
|
494
462
|
cfg = model_kwargs.pop("config", None)
|
huggingface_hub/lfs.py
CHANGED
|
@@ -23,9 +23,10 @@ from os.path import getsize
|
|
|
23
23
|
from typing import BinaryIO, Iterable, List, Optional, Tuple
|
|
24
24
|
|
|
25
25
|
import requests
|
|
26
|
-
from huggingface_hub.constants import ENDPOINT, REPO_TYPES_URL_PREFIXES
|
|
27
26
|
from requests.auth import HTTPBasicAuth
|
|
28
27
|
|
|
28
|
+
from huggingface_hub.constants import ENDPOINT, REPO_TYPES_URL_PREFIXES
|
|
29
|
+
|
|
29
30
|
from .utils import (
|
|
30
31
|
get_token_to_send,
|
|
31
32
|
hf_raise_for_status,
|
|
@@ -92,10 +93,7 @@ def _validate_lfs_action(lfs_action: dict):
|
|
|
92
93
|
"""validates response from the LFS batch endpoint"""
|
|
93
94
|
if not (
|
|
94
95
|
isinstance(lfs_action.get("href"), str)
|
|
95
|
-
and (
|
|
96
|
-
lfs_action.get("header") is None
|
|
97
|
-
or isinstance(lfs_action.get("header"), dict)
|
|
98
|
-
)
|
|
96
|
+
and (lfs_action.get("header") is None or isinstance(lfs_action.get("header"), dict))
|
|
99
97
|
):
|
|
100
98
|
raise ValueError("lfs_action is improperly formatted")
|
|
101
99
|
return lfs_action
|
|
@@ -103,10 +101,7 @@ def _validate_lfs_action(lfs_action: dict):
|
|
|
103
101
|
|
|
104
102
|
def _validate_batch_actions(lfs_batch_actions: dict):
|
|
105
103
|
"""validates response from the LFS batch endpoint"""
|
|
106
|
-
if not (
|
|
107
|
-
isinstance(lfs_batch_actions.get("oid"), str)
|
|
108
|
-
and isinstance(lfs_batch_actions.get("size"), int)
|
|
109
|
-
):
|
|
104
|
+
if not (isinstance(lfs_batch_actions.get("oid"), str) and isinstance(lfs_batch_actions.get("size"), int)):
|
|
110
105
|
raise ValueError("lfs_batch_actions is improperly formatted")
|
|
111
106
|
|
|
112
107
|
upload_action = lfs_batch_actions.get("actions", {}).get("upload")
|
|
@@ -120,10 +115,7 @@ def _validate_batch_actions(lfs_batch_actions: dict):
|
|
|
120
115
|
|
|
121
116
|
def _validate_batch_error(lfs_batch_error: dict):
|
|
122
117
|
"""validates response from the LFS batch endpoint"""
|
|
123
|
-
if not (
|
|
124
|
-
isinstance(lfs_batch_error.get("oid"), str)
|
|
125
|
-
and isinstance(lfs_batch_error.get("size"), int)
|
|
126
|
-
):
|
|
118
|
+
if not (isinstance(lfs_batch_error.get("oid"), str) and isinstance(lfs_batch_error.get("size"), int)):
|
|
127
119
|
raise ValueError("lfs_batch_error is improperly formatted")
|
|
128
120
|
error_info = lfs_batch_error.get("error")
|
|
129
121
|
if not (
|
|
@@ -258,10 +250,7 @@ def lfs_upload(
|
|
|
258
250
|
if isinstance(chunk_size, str):
|
|
259
251
|
chunk_size = int(chunk_size, 10)
|
|
260
252
|
else:
|
|
261
|
-
raise ValueError(
|
|
262
|
-
"Malformed response from LFS batch endpoint: `chunk_size`"
|
|
263
|
-
" should be a string"
|
|
264
|
-
)
|
|
253
|
+
raise ValueError("Malformed response from LFS batch endpoint: `chunk_size` should be a string")
|
|
265
254
|
_upload_multi_part(
|
|
266
255
|
completion_url=upload_action["href"],
|
|
267
256
|
fileobj=fileobj,
|
|
@@ -385,10 +374,7 @@ def _upload_multi_part(
|
|
|
385
374
|
hf_raise_for_status(part_upload_res)
|
|
386
375
|
etag = part_upload_res.headers.get("etag")
|
|
387
376
|
if etag is None or etag == "":
|
|
388
|
-
raise ValueError(
|
|
389
|
-
f"Invalid etag (`{etag}`) returned for part {part_idx +1} of"
|
|
390
|
-
f" {num_parts}"
|
|
391
|
-
)
|
|
377
|
+
raise ValueError(f"Invalid etag (`{etag}`) returned for part {part_idx +1} of {num_parts}")
|
|
392
378
|
completion_payload["parts"][part_idx]["etag"] = etag
|
|
393
379
|
|
|
394
380
|
completion_res = requests.post(
|
|
@@ -466,9 +452,7 @@ class SliceFileObj(AbstractContextManager):
|
|
|
466
452
|
if pos >= self._len:
|
|
467
453
|
return b""
|
|
468
454
|
remaining_amount = self._len - pos
|
|
469
|
-
data = self.fileobj.read(
|
|
470
|
-
remaining_amount if n < 0 else min(n, remaining_amount)
|
|
471
|
-
)
|
|
455
|
+
data = self.fileobj.read(remaining_amount if n < 0 else min(n, remaining_amount))
|
|
472
456
|
return data
|
|
473
457
|
|
|
474
458
|
def tell(self) -> int:
|
huggingface_hub/repocard.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Any, Dict, Optional, Type, Union
|
|
|
5
5
|
|
|
6
6
|
import requests
|
|
7
7
|
import yaml
|
|
8
|
+
|
|
8
9
|
from huggingface_hub.file_download import hf_hub_download
|
|
9
10
|
from huggingface_hub.hf_api import upload_file
|
|
10
11
|
from huggingface_hub.repocard_data import (
|
|
@@ -12,6 +13,7 @@ from huggingface_hub.repocard_data import (
|
|
|
12
13
|
DatasetCardData,
|
|
13
14
|
EvalResult,
|
|
14
15
|
ModelCardData,
|
|
16
|
+
SpaceCardData,
|
|
15
17
|
eval_results_to_model_index,
|
|
16
18
|
model_index_to_eval_results,
|
|
17
19
|
)
|
|
@@ -24,9 +26,7 @@ from .utils.logging import get_logger
|
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
|
|
27
|
-
TEMPLATE_DATASETCARD_PATH = (
|
|
28
|
-
Path(__file__).parent / "templates" / "datasetcard_template.md"
|
|
29
|
-
)
|
|
29
|
+
TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md"
|
|
30
30
|
|
|
31
31
|
# exact same regex as in the Hub server. Please keep in sync.
|
|
32
32
|
# See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18
|
|
@@ -40,7 +40,7 @@ class RepoCard:
|
|
|
40
40
|
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
41
41
|
repo_type = "model"
|
|
42
42
|
|
|
43
|
-
def __init__(self, content: str):
|
|
43
|
+
def __init__(self, content: str, ignore_metadata_errors: bool = False):
|
|
44
44
|
"""Initialize a RepoCard from string content. The content should be a
|
|
45
45
|
Markdown file with a YAML block at the beginning and a Markdown body.
|
|
46
46
|
|
|
@@ -76,6 +76,7 @@ class RepoCard:
|
|
|
76
76
|
|
|
77
77
|
# Set the content of the RepoCard, as well as underlying .data and .text attributes.
|
|
78
78
|
# See the `content` property setter for more details.
|
|
79
|
+
self.ignore_metadata_errors = ignore_metadata_errors
|
|
79
80
|
self.content = content
|
|
80
81
|
|
|
81
82
|
@property
|
|
@@ -101,13 +102,11 @@ class RepoCard:
|
|
|
101
102
|
raise ValueError("repo card metadata block should be a dict")
|
|
102
103
|
else:
|
|
103
104
|
# Model card without metadata... create empty metadata
|
|
104
|
-
logger.warning(
|
|
105
|
-
"Repo card metadata block was not found. Setting CardData to empty."
|
|
106
|
-
)
|
|
105
|
+
logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
|
|
107
106
|
data_dict = {}
|
|
108
107
|
self.text = content
|
|
109
108
|
|
|
110
|
-
self.data = self.card_data_class(**data_dict)
|
|
109
|
+
self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
|
|
111
110
|
|
|
112
111
|
def __str__(self):
|
|
113
112
|
return self.content
|
|
@@ -138,6 +137,7 @@ class RepoCard:
|
|
|
138
137
|
repo_id_or_path: Union[str, Path],
|
|
139
138
|
repo_type: Optional[str] = None,
|
|
140
139
|
token: Optional[str] = None,
|
|
140
|
+
ignore_metadata_errors: bool = False,
|
|
141
141
|
):
|
|
142
142
|
"""Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
|
|
143
143
|
|
|
@@ -145,13 +145,14 @@ class RepoCard:
|
|
|
145
145
|
repo_id_or_path (`Union[str, Path]`):
|
|
146
146
|
The repo ID associated with a Hugging Face Hub repo or a local filepath.
|
|
147
147
|
repo_type (`str`, *optional*):
|
|
148
|
-
The type of Hugging Face repo to push to. Defaults to None, which will use
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
the child class's `repo_type`.
|
|
148
|
+
The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options
|
|
149
|
+
are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
|
|
150
|
+
class, the default value will be the child class's `repo_type`.
|
|
152
151
|
token (`str`, *optional*):
|
|
153
|
-
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
|
|
154
|
-
|
|
152
|
+
Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
|
|
153
|
+
ignore_metadata_errors (`str`):
|
|
154
|
+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
|
155
|
+
the process. Use it at your own risk.
|
|
155
156
|
|
|
156
157
|
Returns:
|
|
157
158
|
[`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
|
|
@@ -176,13 +177,11 @@ class RepoCard:
|
|
|
176
177
|
token=token,
|
|
177
178
|
)
|
|
178
179
|
else:
|
|
179
|
-
raise ValueError(
|
|
180
|
-
f"Cannot load RepoCard: path not found on disk ({repo_id_or_path})."
|
|
181
|
-
)
|
|
180
|
+
raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).")
|
|
182
181
|
|
|
183
182
|
# Preserve newlines in the existing file.
|
|
184
183
|
with Path(card_path).open(mode="r", newline="", encoding="utf-8") as f:
|
|
185
|
-
return cls(f.read())
|
|
184
|
+
return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
|
|
186
185
|
|
|
187
186
|
def validate(self, repo_type: Optional[str] = None):
|
|
188
187
|
"""Validates card against Hugging Face Hub's card validation logic.
|
|
@@ -215,9 +214,7 @@ class RepoCard:
|
|
|
215
214
|
headers = {"Accept": "text/plain"}
|
|
216
215
|
|
|
217
216
|
try:
|
|
218
|
-
r = requests.post(
|
|
219
|
-
"https://huggingface.co/api/validate-yaml", body, headers=headers
|
|
220
|
-
)
|
|
217
|
+
r = requests.post("https://huggingface.co/api/validate-yaml", body, headers=headers)
|
|
221
218
|
r.raise_for_status()
|
|
222
219
|
except requests.exceptions.HTTPError as exc:
|
|
223
220
|
if r.status_code == 400:
|
|
@@ -321,9 +318,7 @@ class RepoCard:
|
|
|
321
318
|
|
|
322
319
|
kwargs = card_data.to_dict().copy()
|
|
323
320
|
kwargs.update(template_kwargs) # Template_kwargs have priority
|
|
324
|
-
template = jinja2.Template(
|
|
325
|
-
Path(template_path or cls.default_template_path).read_text()
|
|
326
|
-
)
|
|
321
|
+
template = jinja2.Template(Path(template_path or cls.default_template_path).read_text())
|
|
327
322
|
content = template.render(card_data=card_data.to_yaml(), **kwargs)
|
|
328
323
|
return cls(content)
|
|
329
324
|
|
|
@@ -367,7 +362,7 @@ class ModelCard(RepoCard):
|
|
|
367
362
|
... license='mit',
|
|
368
363
|
... library_name='timm',
|
|
369
364
|
... tags=['image-classification', 'resnet'],
|
|
370
|
-
... datasets='beans',
|
|
365
|
+
... datasets=['beans'],
|
|
371
366
|
... metrics=['accuracy'],
|
|
372
367
|
... )
|
|
373
368
|
>>> card = ModelCard.from_template(
|
|
@@ -472,10 +467,16 @@ class DatasetCard(RepoCard):
|
|
|
472
467
|
return super().from_template(card_data, template_path, **template_kwargs)
|
|
473
468
|
|
|
474
469
|
|
|
475
|
-
|
|
470
|
+
class SpaceCard(RepoCard):
|
|
471
|
+
card_data_class = SpaceCardData
|
|
472
|
+
default_template_path = TEMPLATE_MODELCARD_PATH
|
|
473
|
+
repo_type = "space"
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: # noqa: F722
|
|
476
477
|
"""Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines.
|
|
477
478
|
|
|
478
|
-
Uses same
|
|
479
|
+
Uses same implementation as in Hub server, keep it in sync.
|
|
479
480
|
|
|
480
481
|
Returns:
|
|
481
482
|
str: The detected line ending of the string.
|
|
@@ -531,11 +532,7 @@ def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
|
|
|
531
532
|
# sort_keys: keep dict order
|
|
532
533
|
match = REGEX_YAML_BLOCK.search(content)
|
|
533
534
|
if match:
|
|
534
|
-
output = (
|
|
535
|
-
content[: match.start()]
|
|
536
|
-
+ f"---{line_break}{data_yaml}---{line_break}"
|
|
537
|
-
+ content[match.end() :]
|
|
538
|
-
)
|
|
535
|
+
output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :]
|
|
539
536
|
else:
|
|
540
537
|
output = f"---{line_break}{data_yaml}---{line_break}{content}"
|
|
541
538
|
|
|
@@ -739,11 +736,7 @@ def metadata_update(
|
|
|
739
736
|
|
|
740
737
|
```
|
|
741
738
|
"""
|
|
742
|
-
commit_message =
|
|
743
|
-
commit_message
|
|
744
|
-
if commit_message is not None
|
|
745
|
-
else "Update metadata with huggingface_hub"
|
|
746
|
-
)
|
|
739
|
+
commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
|
|
747
740
|
|
|
748
741
|
# Card class given repo_type
|
|
749
742
|
card_class: Type[RepoCard]
|
|
@@ -762,10 +755,7 @@ def metadata_update(
|
|
|
762
755
|
card = card_class.load(repo_id, token=token, repo_type=repo_type)
|
|
763
756
|
except EntryNotFoundError:
|
|
764
757
|
if repo_type == "space":
|
|
765
|
-
raise ValueError(
|
|
766
|
-
"Cannot update metadata on a Space that doesn't contain a `README.md`"
|
|
767
|
-
" file."
|
|
768
|
-
)
|
|
758
|
+
raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
|
|
769
759
|
|
|
770
760
|
# Initialize a ModelCard or DatasetCard from default template and no data.
|
|
771
761
|
card = card_class.from_template(CardData())
|
|
@@ -807,18 +797,13 @@ def metadata_update(
|
|
|
807
797
|
card.data.eval_results.append(new_result)
|
|
808
798
|
else:
|
|
809
799
|
# Any metadata that is not a result metric
|
|
810
|
-
if (
|
|
811
|
-
hasattr(card.data, key)
|
|
812
|
-
and getattr(card.data, key) is not None
|
|
813
|
-
and not overwrite
|
|
814
|
-
and getattr(card.data, key) != value
|
|
815
|
-
):
|
|
800
|
+
if card.data.get(key) is not None and not overwrite and card.data.get(key) != value:
|
|
816
801
|
raise ValueError(
|
|
817
802
|
f"You passed a new value for the existing meta data field '{key}'."
|
|
818
803
|
" Set `overwrite=True` to overwrite existing metadata."
|
|
819
804
|
)
|
|
820
805
|
else:
|
|
821
|
-
|
|
806
|
+
card.data[key] = value
|
|
822
807
|
|
|
823
808
|
return card.push_to_hub(
|
|
824
809
|
repo_id,
|