huggingface-hub 0.12.0rc0__py3-none-any.whl → 0.13.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huggingface_hub/__init__.py +166 -126
- huggingface_hub/_commit_api.py +25 -51
- huggingface_hub/_login.py +4 -13
- huggingface_hub/_snapshot_download.py +45 -23
- huggingface_hub/_space_api.py +7 -0
- huggingface_hub/commands/delete_cache.py +13 -39
- huggingface_hub/commands/env.py +1 -3
- huggingface_hub/commands/huggingface_cli.py +1 -3
- huggingface_hub/commands/lfs.py +4 -8
- huggingface_hub/commands/scan_cache.py +5 -16
- huggingface_hub/commands/user.py +27 -45
- huggingface_hub/community.py +4 -4
- huggingface_hub/constants.py +22 -19
- huggingface_hub/fastai_utils.py +14 -23
- huggingface_hub/file_download.py +210 -121
- huggingface_hub/hf_api.py +500 -255
- huggingface_hub/hub_mixin.py +181 -176
- huggingface_hub/inference_api.py +4 -10
- huggingface_hub/keras_mixin.py +39 -71
- huggingface_hub/lfs.py +8 -24
- huggingface_hub/repocard.py +33 -48
- huggingface_hub/repocard_data.py +141 -30
- huggingface_hub/repository.py +41 -112
- huggingface_hub/templates/modelcard_template.md +39 -34
- huggingface_hub/utils/__init__.py +1 -0
- huggingface_hub/utils/_cache_assets.py +1 -4
- huggingface_hub/utils/_cache_manager.py +17 -39
- huggingface_hub/utils/_deprecation.py +8 -12
- huggingface_hub/utils/_errors.py +10 -57
- huggingface_hub/utils/_fixes.py +2 -6
- huggingface_hub/utils/_git_credential.py +5 -16
- huggingface_hub/utils/_headers.py +22 -11
- huggingface_hub/utils/_http.py +1 -4
- huggingface_hub/utils/_paths.py +5 -12
- huggingface_hub/utils/_runtime.py +2 -1
- huggingface_hub/utils/_telemetry.py +120 -0
- huggingface_hub/utils/_validators.py +5 -13
- huggingface_hub/utils/endpoint_helpers.py +1 -3
- huggingface_hub/utils/logging.py +10 -8
- {huggingface_hub-0.12.0rc0.dist-info → huggingface_hub-0.13.0rc0.dist-info}/METADATA +7 -14
- huggingface_hub-0.13.0rc0.dist-info/RECORD +56 -0
- huggingface_hub/py.typed +0 -0
- huggingface_hub-0.12.0rc0.dist-info/RECORD +0 -56
- {huggingface_hub-0.12.0rc0.dist-info → huggingface_hub-0.13.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.12.0rc0.dist-info → huggingface_hub-0.13.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.12.0rc0.dist-info → huggingface_hub-0.13.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.12.0rc0.dist-info → huggingface_hub-0.13.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/repocard_data.py
CHANGED
|
@@ -149,7 +149,16 @@ class EvalResult:
|
|
|
149
149
|
|
|
150
150
|
@dataclass
|
|
151
151
|
class CardData:
|
|
152
|
-
|
|
152
|
+
"""Structure containing metadata from a RepoCard.
|
|
153
|
+
|
|
154
|
+
[`CardData`] is the parent class of [`ModelCardData`] and [`DatasetCardData`].
|
|
155
|
+
|
|
156
|
+
Metadata can be exported as a dictionary or YAML. Export can be customized to alter the representation of the data
|
|
157
|
+
(example: flatten evaluation results). `CardData` behaves as a dictionary (can get, pop, set values) but do not
|
|
158
|
+
inherit from `dict` to allow this export step.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
def __init__(self, ignore_metadata_errors: bool = False, **kwargs):
|
|
153
162
|
self.__dict__.update(kwargs)
|
|
154
163
|
|
|
155
164
|
def to_dict(self) -> Dict[str, Any]:
|
|
@@ -187,6 +196,26 @@ class CardData:
|
|
|
187
196
|
def __repr__(self):
|
|
188
197
|
return self.to_yaml()
|
|
189
198
|
|
|
199
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
200
|
+
"""Get value for a given metadata key."""
|
|
201
|
+
return self.__dict__.get(key, default)
|
|
202
|
+
|
|
203
|
+
def pop(self, key: str, default: Any = None) -> Any:
|
|
204
|
+
"""Pop value for a given metadata key."""
|
|
205
|
+
return self.__dict__.pop(key, default)
|
|
206
|
+
|
|
207
|
+
def __getitem__(self, key: str) -> Any:
|
|
208
|
+
"""Get value for a given metadata key."""
|
|
209
|
+
return self.__dict__[key]
|
|
210
|
+
|
|
211
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
212
|
+
"""Set value for a given metadata key."""
|
|
213
|
+
self.__dict__[key] = value
|
|
214
|
+
|
|
215
|
+
def __contains__(self, key: str) -> bool:
|
|
216
|
+
"""Check if a given metadata key is set."""
|
|
217
|
+
return key in self.__dict__
|
|
218
|
+
|
|
190
219
|
|
|
191
220
|
class ModelCardData(CardData):
|
|
192
221
|
"""Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
@@ -205,10 +234,10 @@ class ModelCardData(CardData):
|
|
|
205
234
|
tags (`List[str]`, *optional*):
|
|
206
235
|
List of tags to add to your model that can be used when filtering on the Hugging
|
|
207
236
|
Face Hub. Defaults to None.
|
|
208
|
-
datasets (`
|
|
209
|
-
|
|
237
|
+
datasets (`List[str]`, *optional*):
|
|
238
|
+
List of datasets that were used to train this model. Should be a dataset ID
|
|
210
239
|
found on https://hf.co/datasets. Defaults to None.
|
|
211
|
-
metrics (`
|
|
240
|
+
metrics (`List[str]`, *optional*):
|
|
212
241
|
List of metrics used to evaluate this model. Should be a metric name that can be found
|
|
213
242
|
at https://hf.co/metrics. Example: 'accuracy'. Defaults to None.
|
|
214
243
|
eval_results (`Union[List[EvalResult], EvalResult]`, *optional*):
|
|
@@ -219,6 +248,9 @@ class ModelCardData(CardData):
|
|
|
219
248
|
`eval_results` to construct the `model-index` within the card's metadata. The name
|
|
220
249
|
you supply here is what will be used on PapersWithCode's leaderboards. If None is provided
|
|
221
250
|
then the repo name is used as a default. Defaults to None.
|
|
251
|
+
ignore_metadata_errors (`str`):
|
|
252
|
+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
|
253
|
+
the process. Use it at your own risk.
|
|
222
254
|
kwargs (`dict`, *optional*):
|
|
223
255
|
Additional metadata that will be added to the model card. Defaults to None.
|
|
224
256
|
|
|
@@ -244,10 +276,11 @@ class ModelCardData(CardData):
|
|
|
244
276
|
license: Optional[str] = None,
|
|
245
277
|
library_name: Optional[str] = None,
|
|
246
278
|
tags: Optional[List[str]] = None,
|
|
247
|
-
datasets: Optional[
|
|
248
|
-
metrics: Optional[
|
|
279
|
+
datasets: Optional[List[str]] = None,
|
|
280
|
+
metrics: Optional[List[str]] = None,
|
|
249
281
|
eval_results: Optional[List[EvalResult]] = None,
|
|
250
282
|
model_name: Optional[str] = None,
|
|
283
|
+
ignore_metadata_errors: bool = False,
|
|
251
284
|
**kwargs,
|
|
252
285
|
):
|
|
253
286
|
self.language = language
|
|
@@ -265,10 +298,15 @@ class ModelCardData(CardData):
|
|
|
265
298
|
model_name, eval_results = model_index_to_eval_results(model_index)
|
|
266
299
|
self.model_name = model_name
|
|
267
300
|
self.eval_results = eval_results
|
|
268
|
-
except KeyError:
|
|
269
|
-
|
|
270
|
-
"Invalid model-index. Not loading eval results into CardData."
|
|
271
|
-
|
|
301
|
+
except KeyError as error:
|
|
302
|
+
if ignore_metadata_errors:
|
|
303
|
+
logger.warning("Invalid model-index. Not loading eval results into CardData.")
|
|
304
|
+
else:
|
|
305
|
+
raise ValueError(
|
|
306
|
+
f"Invalid `model_index` in metadata cannot be parsed: KeyError {error}. Pass"
|
|
307
|
+
" `ignore_metadata_errors=True` to ignore this error while loading a Model Card. Warning:"
|
|
308
|
+
" some information will be lost. Use it at your own risk."
|
|
309
|
+
)
|
|
272
310
|
|
|
273
311
|
super().__init__(**kwargs)
|
|
274
312
|
|
|
@@ -276,16 +314,12 @@ class ModelCardData(CardData):
|
|
|
276
314
|
if type(self.eval_results) == EvalResult:
|
|
277
315
|
self.eval_results = [self.eval_results]
|
|
278
316
|
if self.model_name is None:
|
|
279
|
-
raise ValueError(
|
|
280
|
-
"Passing `eval_results` requires `model_name` to be set."
|
|
281
|
-
)
|
|
317
|
+
raise ValueError("Passing `eval_results` requires `model_name` to be set.")
|
|
282
318
|
|
|
283
319
|
def _to_dict(self, data_dict):
|
|
284
320
|
"""Format the internal data dict. In this case, we convert eval results to a valid model index"""
|
|
285
321
|
if self.eval_results is not None:
|
|
286
|
-
data_dict["model-index"] = eval_results_to_model_index(
|
|
287
|
-
self.model_name, self.eval_results
|
|
288
|
-
)
|
|
322
|
+
data_dict["model-index"] = eval_results_to_model_index(self.model_name, self.eval_results)
|
|
289
323
|
del data_dict["eval_results"], data_dict["model_name"]
|
|
290
324
|
|
|
291
325
|
|
|
@@ -293,7 +327,7 @@ class DatasetCardData(CardData):
|
|
|
293
327
|
"""Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
294
328
|
|
|
295
329
|
Args:
|
|
296
|
-
language (`
|
|
330
|
+
language (`List[str]`, *optional*):
|
|
297
331
|
Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or
|
|
298
332
|
639-3 code (two/three letters), or a special value like "code", "multilingual".
|
|
299
333
|
license (`Union[str, List[str]]`, *optional*):
|
|
@@ -311,7 +345,7 @@ class DatasetCardData(CardData):
|
|
|
311
345
|
size_categories (`Union[str, List[str]]`, *optional*):
|
|
312
346
|
The number of examples in the dataset. Options are: 'n<1K', '1K<n<10K', '10K<n<100K',
|
|
313
347
|
'100K<n<1M', '1M<n<10M', '10M<n<100M', '100M<n<1B', '1B<n<10B', '10B<n<100B', '100B<n<1T', 'n>1T', and 'other'.
|
|
314
|
-
source_datasets (`
|
|
348
|
+
source_datasets (`List[str]]`, *optional*):
|
|
315
349
|
Indicates whether the dataset is an original dataset or extended from another existing dataset.
|
|
316
350
|
Options are: 'original' and 'extended'.
|
|
317
351
|
task_categories (`Union[str, List[str]]`, *optional*):
|
|
@@ -327,6 +361,9 @@ class DatasetCardData(CardData):
|
|
|
327
361
|
If not provided, it will be gathered from the 'train-eval-index' key of the kwargs.
|
|
328
362
|
configs (`Union[str, List[str]]`, *optional*):
|
|
329
363
|
A list of the available dataset configs for the dataset.
|
|
364
|
+
ignore_metadata_errors (`str`):
|
|
365
|
+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
|
366
|
+
the process. Use it at your own risk.
|
|
330
367
|
"""
|
|
331
368
|
|
|
332
369
|
def __init__(
|
|
@@ -338,13 +375,14 @@ class DatasetCardData(CardData):
|
|
|
338
375
|
language_creators: Optional[Union[str, List[str]]] = None,
|
|
339
376
|
multilinguality: Optional[Union[str, List[str]]] = None,
|
|
340
377
|
size_categories: Optional[Union[str, List[str]]] = None,
|
|
341
|
-
source_datasets: Optional[
|
|
378
|
+
source_datasets: Optional[List[str]] = None,
|
|
342
379
|
task_categories: Optional[Union[str, List[str]]] = None,
|
|
343
380
|
task_ids: Optional[Union[str, List[str]]] = None,
|
|
344
381
|
paperswithcode_id: Optional[str] = None,
|
|
345
382
|
pretty_name: Optional[str] = None,
|
|
346
383
|
train_eval_index: Optional[Dict] = None,
|
|
347
384
|
configs: Optional[Union[str, List[str]]] = None,
|
|
385
|
+
ignore_metadata_errors: bool = False,
|
|
348
386
|
**kwargs,
|
|
349
387
|
):
|
|
350
388
|
self.annotations_creators = annotations_creators
|
|
@@ -368,9 +406,88 @@ class DatasetCardData(CardData):
|
|
|
368
406
|
data_dict["train-eval-index"] = data_dict.pop("train_eval_index")
|
|
369
407
|
|
|
370
408
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
409
|
+
class SpaceCardData(CardData):
|
|
410
|
+
"""Space Card Metadata that is used by Hugging Face Hub when included at the top of your README.md
|
|
411
|
+
|
|
412
|
+
To get an exhaustive reference of Spaces configuration, please visit https://huggingface.co/docs/hub/spaces-config-reference#spaces-configuration-reference.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
title (`str`, *optional*)
|
|
416
|
+
Title of the Space.
|
|
417
|
+
sdk (`str`, *optional*)
|
|
418
|
+
SDK of the Space (one of `gradio`, `streamlit`, `docker`, or `static`).
|
|
419
|
+
sdk_version (`str`, *optional*)
|
|
420
|
+
Version of the used SDK (if Gradio/Streamlit sdk).
|
|
421
|
+
python_version (`str`, *optional*)
|
|
422
|
+
Python version used in the Space (if Gradio/Streamlit sdk).
|
|
423
|
+
app_file (`str`, *optional*)
|
|
424
|
+
Path to your main application file (which contains either gradio or streamlit Python code, or static html code).
|
|
425
|
+
Path is relative to the root of the repository.
|
|
426
|
+
app_port (`str`, *optional*)
|
|
427
|
+
Port on which your application is running. Used only if sdk is `docker`.
|
|
428
|
+
license (`str`, *optional*)
|
|
429
|
+
License of this model. Example: apache-2.0 or any license from
|
|
430
|
+
https://huggingface.co/docs/hub/repositories-licenses.
|
|
431
|
+
duplicated_from (`str`, *optional*)
|
|
432
|
+
ID of the original Space if this is a duplicated Space.
|
|
433
|
+
models (List[`str`], *optional*)
|
|
434
|
+
List of models related to this Space. Should be a dataset ID found on https://hf.co/models.
|
|
435
|
+
datasets (`List[str]`, *optional*)
|
|
436
|
+
List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets.
|
|
437
|
+
tags (`List[str]`, *optional*)
|
|
438
|
+
List of tags to add to your Space that can be used when filtering on the Hub.
|
|
439
|
+
ignore_metadata_errors (`str`):
|
|
440
|
+
If True, errors while parsing the metadata section will be ignored. Some information might be lost during
|
|
441
|
+
the process. Use it at your own risk.
|
|
442
|
+
kwargs (`dict`, *optional*):
|
|
443
|
+
Additional metadata that will be added to the space card.
|
|
444
|
+
|
|
445
|
+
Example:
|
|
446
|
+
```python
|
|
447
|
+
>>> from huggingface_hub import SpaceCardData
|
|
448
|
+
>>> card_data = SpaceCardData(
|
|
449
|
+
... title="Dreambooth Training",
|
|
450
|
+
... license="mit",
|
|
451
|
+
... sdk="gradio",
|
|
452
|
+
... duplicated_from="multimodalart/dreambooth-training"
|
|
453
|
+
... )
|
|
454
|
+
>>> card_data.to_dict()
|
|
455
|
+
{'title': 'Dreambooth Training', 'sdk': 'gradio', 'license': 'mit', 'duplicated_from': 'multimodalart/dreambooth-training'}
|
|
456
|
+
```
|
|
457
|
+
"""
|
|
458
|
+
|
|
459
|
+
def __init__(
|
|
460
|
+
self,
|
|
461
|
+
*,
|
|
462
|
+
title: Optional[str] = None,
|
|
463
|
+
sdk: Optional[str] = None,
|
|
464
|
+
sdk_version: Optional[str] = None,
|
|
465
|
+
python_version: Optional[str] = None,
|
|
466
|
+
app_file: Optional[str] = None,
|
|
467
|
+
app_port: Optional[int] = None,
|
|
468
|
+
license: Optional[str] = None,
|
|
469
|
+
duplicated_from: Optional[str] = None,
|
|
470
|
+
models: Optional[List[str]] = None,
|
|
471
|
+
datasets: Optional[List[str]] = None,
|
|
472
|
+
tags: Optional[List[str]] = None,
|
|
473
|
+
ignore_metadata_errors: bool = False,
|
|
474
|
+
**kwargs,
|
|
475
|
+
):
|
|
476
|
+
self.title = title
|
|
477
|
+
self.sdk = sdk
|
|
478
|
+
self.sdk_version = sdk_version
|
|
479
|
+
self.python_version = python_version
|
|
480
|
+
self.app_file = app_file
|
|
481
|
+
self.app_port = app_port
|
|
482
|
+
self.license = license
|
|
483
|
+
self.duplicated_from = duplicated_from
|
|
484
|
+
self.models = models
|
|
485
|
+
self.datasets = datasets
|
|
486
|
+
self.tags = tags
|
|
487
|
+
super().__init__(**kwargs)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str, List[EvalResult]]:
|
|
374
491
|
"""Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects.
|
|
375
492
|
|
|
376
493
|
A detailed spec of the model index can be found here:
|
|
@@ -477,18 +594,12 @@ def _remove_none(obj):
|
|
|
477
594
|
if isinstance(obj, (list, tuple, set)):
|
|
478
595
|
return type(obj)(_remove_none(x) for x in obj if x is not None)
|
|
479
596
|
elif isinstance(obj, dict):
|
|
480
|
-
return type(obj)(
|
|
481
|
-
(_remove_none(k), _remove_none(v))
|
|
482
|
-
for k, v in obj.items()
|
|
483
|
-
if k is not None and v is not None
|
|
484
|
-
)
|
|
597
|
+
return type(obj)((_remove_none(k), _remove_none(v)) for k, v in obj.items() if k is not None and v is not None)
|
|
485
598
|
else:
|
|
486
599
|
return obj
|
|
487
600
|
|
|
488
601
|
|
|
489
|
-
def eval_results_to_model_index(
|
|
490
|
-
model_name: str, eval_results: List[EvalResult]
|
|
491
|
-
) -> List[Dict[str, Any]]:
|
|
602
|
+
def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult]) -> List[Dict[str, Any]]:
|
|
492
603
|
"""Takes in given model name and list of `huggingface_hub.EvalResult` and returns a
|
|
493
604
|
valid model-index that will be compatible with the format expected by the
|
|
494
605
|
Hugging Face Hub.
|
huggingface_hub/repository.py
CHANGED
|
@@ -124,9 +124,7 @@ def is_git_repo(folder: Union[str, Path]) -> bool:
|
|
|
124
124
|
otherwise.
|
|
125
125
|
"""
|
|
126
126
|
folder_exists = os.path.exists(os.path.join(folder, ".git"))
|
|
127
|
-
git_branch = subprocess.run(
|
|
128
|
-
"git branch".split(), cwd=folder, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
|
129
|
-
)
|
|
127
|
+
git_branch = subprocess.run("git branch".split(), cwd=folder, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
130
128
|
return folder_exists and git_branch.returncode == 0
|
|
131
129
|
|
|
132
130
|
|
|
@@ -234,17 +232,13 @@ def is_binary_file(filename: Union[str, Path]) -> bool:
|
|
|
234
232
|
|
|
235
233
|
# Code sample taken from the following stack overflow thread
|
|
236
234
|
# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391
|
|
237
|
-
text_chars = bytearray(
|
|
238
|
-
{7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F}
|
|
239
|
-
)
|
|
235
|
+
text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
|
|
240
236
|
return bool(content.translate(None, text_chars))
|
|
241
237
|
except UnicodeDecodeError:
|
|
242
238
|
return True
|
|
243
239
|
|
|
244
240
|
|
|
245
|
-
def files_to_be_staged(
|
|
246
|
-
pattern: str = ".", folder: Union[str, Path, None] = None
|
|
247
|
-
) -> List[str]:
|
|
241
|
+
def files_to_be_staged(pattern: str = ".", folder: Union[str, Path, None] = None) -> List[str]:
|
|
248
242
|
"""
|
|
249
243
|
Returns a list of filenames that are to be staged.
|
|
250
244
|
|
|
@@ -258,7 +252,7 @@ def files_to_be_staged(
|
|
|
258
252
|
`List[str]`: List of files that are to be staged.
|
|
259
253
|
"""
|
|
260
254
|
try:
|
|
261
|
-
p = run_subprocess("git ls-files -mo".split() + [pattern], folder)
|
|
255
|
+
p = run_subprocess("git ls-files --exclude-standard -mo".split() + [pattern], folder)
|
|
262
256
|
if len(p.stdout.strip()):
|
|
263
257
|
files = p.stdout.strip().split("\n")
|
|
264
258
|
else:
|
|
@@ -379,7 +373,11 @@ def _lfs_log_progress():
|
|
|
379
373
|
time.sleep(2)
|
|
380
374
|
|
|
381
375
|
for line in tail_file(os.environ["GIT_LFS_PROGRESS"]):
|
|
382
|
-
|
|
376
|
+
try:
|
|
377
|
+
state, file_progress, byte_progress, filename = line.split()
|
|
378
|
+
except ValueError as error:
|
|
379
|
+
# Try/except to ease debugging. See https://github.com/huggingface/huggingface_hub/issues/1373.
|
|
380
|
+
raise ValueError(f"Cannot unpack LFS progress line:\n{line}") from error
|
|
383
381
|
description = f"{state.capitalize()} file {filename}"
|
|
384
382
|
|
|
385
383
|
current_bytes, total_bytes = byte_progress.split("/")
|
|
@@ -520,14 +518,9 @@ class Repository:
|
|
|
520
518
|
if is_git_repo(self.local_dir):
|
|
521
519
|
logger.debug("[Repository] is a valid git repo")
|
|
522
520
|
else:
|
|
523
|
-
raise ValueError(
|
|
524
|
-
"If not specifying `clone_from`, you need to pass Repository a"
|
|
525
|
-
" valid git clone."
|
|
526
|
-
)
|
|
521
|
+
raise ValueError("If not specifying `clone_from`, you need to pass Repository a valid git clone.")
|
|
527
522
|
|
|
528
|
-
if self.huggingface_token is not None and (
|
|
529
|
-
git_email is None or git_user is None
|
|
530
|
-
):
|
|
523
|
+
if self.huggingface_token is not None and (git_email is None or git_user is None):
|
|
531
524
|
user = self.client.whoami(self.huggingface_token)
|
|
532
525
|
|
|
533
526
|
if git_email is None:
|
|
@@ -558,9 +551,7 @@ class Repository:
|
|
|
558
551
|
`str`: Current checked out branch.
|
|
559
552
|
"""
|
|
560
553
|
try:
|
|
561
|
-
result = run_subprocess(
|
|
562
|
-
"git rev-parse --abbrev-ref HEAD", self.local_dir
|
|
563
|
-
).stdout.strip()
|
|
554
|
+
result = run_subprocess("git rev-parse --abbrev-ref HEAD", self.local_dir).stdout.strip()
|
|
564
555
|
except subprocess.CalledProcessError as exc:
|
|
565
556
|
raise EnvironmentError(exc.stderr)
|
|
566
557
|
|
|
@@ -577,14 +568,10 @@ class Repository:
|
|
|
577
568
|
try:
|
|
578
569
|
git_version = run_subprocess("git --version", self.local_dir).stdout.strip()
|
|
579
570
|
except FileNotFoundError:
|
|
580
|
-
raise EnvironmentError(
|
|
581
|
-
"Looks like you do not have git installed, please install."
|
|
582
|
-
)
|
|
571
|
+
raise EnvironmentError("Looks like you do not have git installed, please install.")
|
|
583
572
|
|
|
584
573
|
try:
|
|
585
|
-
lfs_version = run_subprocess(
|
|
586
|
-
"git-lfs --version", self.local_dir
|
|
587
|
-
).stdout.strip()
|
|
574
|
+
lfs_version = run_subprocess("git-lfs --version", self.local_dir).stdout.strip()
|
|
588
575
|
except FileNotFoundError:
|
|
589
576
|
raise EnvironmentError(
|
|
590
577
|
"Looks like you do not have git-lfs installed, please install."
|
|
@@ -645,12 +632,8 @@ class Repository:
|
|
|
645
632
|
)
|
|
646
633
|
|
|
647
634
|
hub_url = self.client.endpoint
|
|
648
|
-
if hub_url in repo_url or (
|
|
649
|
-
|
|
650
|
-
):
|
|
651
|
-
repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(
|
|
652
|
-
repo_url, hub_url=hub_url
|
|
653
|
-
)
|
|
635
|
+
if hub_url in repo_url or ("http" not in repo_url and len(repo_url.split("/")) <= 2):
|
|
636
|
+
repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(repo_url, hub_url=hub_url)
|
|
654
637
|
repo_id = f"{namespace}/{repo_name}" if namespace is not None else repo_name
|
|
655
638
|
|
|
656
639
|
if repo_type is not None:
|
|
@@ -710,9 +693,7 @@ class Repository:
|
|
|
710
693
|
" `repo.git_pull()`."
|
|
711
694
|
)
|
|
712
695
|
else:
|
|
713
|
-
output = run_subprocess(
|
|
714
|
-
"git remote get-url origin", self.local_dir, check=False
|
|
715
|
-
)
|
|
696
|
+
output = run_subprocess("git remote get-url origin", self.local_dir, check=False)
|
|
716
697
|
|
|
717
698
|
error_msg = (
|
|
718
699
|
f"Tried to clone {clean_repo_url} in an unrelated git"
|
|
@@ -720,21 +701,14 @@ class Repository:
|
|
|
720
701
|
f" a remote with the following URL: {clean_repo_url}."
|
|
721
702
|
)
|
|
722
703
|
if output.returncode == 0:
|
|
723
|
-
clean_local_remote_url = re.sub(
|
|
724
|
-
|
|
725
|
-
)
|
|
726
|
-
error_msg += (
|
|
727
|
-
"\nLocal path has its origin defined as:"
|
|
728
|
-
f" {clean_local_remote_url}"
|
|
729
|
-
)
|
|
704
|
+
clean_local_remote_url = re.sub(r"https://.*@", "https://", output.stdout)
|
|
705
|
+
error_msg += f"\nLocal path has its origin defined as: {clean_local_remote_url}"
|
|
730
706
|
raise EnvironmentError(error_msg)
|
|
731
707
|
|
|
732
708
|
except subprocess.CalledProcessError as exc:
|
|
733
709
|
raise EnvironmentError(exc.stderr)
|
|
734
710
|
|
|
735
|
-
def git_config_username_and_email(
|
|
736
|
-
self, git_user: Optional[str] = None, git_email: Optional[str] = None
|
|
737
|
-
):
|
|
711
|
+
def git_config_username_and_email(self, git_user: Optional[str] = None, git_email: Optional[str] = None):
|
|
738
712
|
"""
|
|
739
713
|
Sets git username and email (only in the current repo).
|
|
740
714
|
|
|
@@ -746,14 +720,10 @@ class Repository:
|
|
|
746
720
|
"""
|
|
747
721
|
try:
|
|
748
722
|
if git_user is not None:
|
|
749
|
-
run_subprocess(
|
|
750
|
-
"git config user.name".split() + [git_user], self.local_dir
|
|
751
|
-
)
|
|
723
|
+
run_subprocess("git config user.name".split() + [git_user], self.local_dir)
|
|
752
724
|
|
|
753
725
|
if git_email is not None:
|
|
754
|
-
run_subprocess(
|
|
755
|
-
f"git config user.email {git_email}".split(), self.local_dir
|
|
756
|
-
)
|
|
726
|
+
run_subprocess(f"git config user.email {git_email}".split(), self.local_dir)
|
|
757
727
|
except subprocess.CalledProcessError as exc:
|
|
758
728
|
raise EnvironmentError(exc.stderr)
|
|
759
729
|
|
|
@@ -836,14 +806,10 @@ class Repository:
|
|
|
836
806
|
modified_files_statuses = [status.strip() for status in git_status.split("\n")]
|
|
837
807
|
|
|
838
808
|
# Only keep files that are deleted using the D prefix
|
|
839
|
-
deleted_files_statuses = [
|
|
840
|
-
status for status in modified_files_statuses if "D" in status.split()[0]
|
|
841
|
-
]
|
|
809
|
+
deleted_files_statuses = [status for status in modified_files_statuses if "D" in status.split()[0]]
|
|
842
810
|
|
|
843
811
|
# Remove the D prefix and strip to keep only the relevant filename
|
|
844
|
-
deleted_files = [
|
|
845
|
-
status.split()[-1].strip() for status in deleted_files_statuses
|
|
846
|
-
]
|
|
812
|
+
deleted_files = [status.split()[-1].strip() for status in deleted_files_statuses]
|
|
847
813
|
|
|
848
814
|
return deleted_files
|
|
849
815
|
|
|
@@ -969,11 +935,7 @@ class Repository:
|
|
|
969
935
|
path_to_file = os.path.join(os.getcwd(), self.local_dir, filename)
|
|
970
936
|
size_in_mb = os.path.getsize(path_to_file) / (1024 * 1024)
|
|
971
937
|
|
|
972
|
-
if (
|
|
973
|
-
size_in_mb >= 10
|
|
974
|
-
and not is_tracked_with_lfs(path_to_file)
|
|
975
|
-
and not is_git_ignored(path_to_file)
|
|
976
|
-
):
|
|
938
|
+
if size_in_mb >= 10 and not is_tracked_with_lfs(path_to_file) and not is_git_ignored(path_to_file):
|
|
977
939
|
self.lfs_track(filename)
|
|
978
940
|
files_to_be_tracked_with_lfs.append(filename)
|
|
979
941
|
|
|
@@ -995,9 +957,7 @@ class Repository:
|
|
|
995
957
|
"""
|
|
996
958
|
try:
|
|
997
959
|
with _lfs_log_progress():
|
|
998
|
-
result = run_subprocess(
|
|
999
|
-
f"git lfs prune {'--recent' if recent else ''}", self.local_dir
|
|
1000
|
-
)
|
|
960
|
+
result = run_subprocess(f"git lfs prune {'--recent' if recent else ''}", self.local_dir)
|
|
1001
961
|
logger.info(result.stdout)
|
|
1002
962
|
except subprocess.CalledProcessError as exc:
|
|
1003
963
|
raise EnvironmentError(exc.stderr)
|
|
@@ -1069,9 +1029,7 @@ class Repository:
|
|
|
1069
1029
|
The message attributed to the commit.
|
|
1070
1030
|
"""
|
|
1071
1031
|
try:
|
|
1072
|
-
result = run_subprocess(
|
|
1073
|
-
"git commit -v -m".split() + [commit_message], self.local_dir
|
|
1074
|
-
)
|
|
1032
|
+
result = run_subprocess("git commit -v -m".split() + [commit_message], self.local_dir)
|
|
1075
1033
|
logger.info(f"Committed:\n{result.stdout}\n")
|
|
1076
1034
|
except subprocess.CalledProcessError as exc:
|
|
1077
1035
|
if len(exc.stderr) > 0:
|
|
@@ -1115,9 +1073,7 @@ class Repository:
|
|
|
1115
1073
|
number_of_commits = commits_to_push(self.local_dir, upstream)
|
|
1116
1074
|
|
|
1117
1075
|
if number_of_commits > 1:
|
|
1118
|
-
logger.warning(
|
|
1119
|
-
f"Several commits ({number_of_commits}) will be pushed upstream."
|
|
1120
|
-
)
|
|
1076
|
+
logger.warning(f"Several commits ({number_of_commits}) will be pushed upstream.")
|
|
1121
1077
|
if blocking:
|
|
1122
1078
|
logger.warning("The progress bars may be unreliable.")
|
|
1123
1079
|
|
|
@@ -1140,9 +1096,7 @@ class Repository:
|
|
|
1140
1096
|
logger.warning(stderr)
|
|
1141
1097
|
|
|
1142
1098
|
if return_code:
|
|
1143
|
-
raise subprocess.CalledProcessError(
|
|
1144
|
-
return_code, process.args, output=stdout, stderr=stderr
|
|
1145
|
-
)
|
|
1099
|
+
raise subprocess.CalledProcessError(return_code, process.args, output=stdout, stderr=stderr)
|
|
1146
1100
|
|
|
1147
1101
|
except subprocess.CalledProcessError as exc:
|
|
1148
1102
|
raise EnvironmentError(exc.stderr)
|
|
@@ -1197,12 +1151,9 @@ class Repository:
|
|
|
1197
1151
|
raise EnvironmentError(exc.stderr)
|
|
1198
1152
|
else:
|
|
1199
1153
|
try:
|
|
1200
|
-
result = run_subprocess(
|
|
1201
|
-
f"git checkout -b {revision}", self.local_dir
|
|
1202
|
-
)
|
|
1154
|
+
result = run_subprocess(f"git checkout -b {revision}", self.local_dir)
|
|
1203
1155
|
logger.warning(
|
|
1204
|
-
f"Revision `{revision}` does not exist. Created and checked out"
|
|
1205
|
-
f" branch `{revision}`."
|
|
1156
|
+
f"Revision `{revision}` does not exist. Created and checked out branch `{revision}`."
|
|
1206
1157
|
)
|
|
1207
1158
|
logger.warning(result.stdout)
|
|
1208
1159
|
except subprocess.CalledProcessError as exc:
|
|
@@ -1224,9 +1175,7 @@ class Repository:
|
|
|
1224
1175
|
"""
|
|
1225
1176
|
if remote:
|
|
1226
1177
|
try:
|
|
1227
|
-
result = run_subprocess(
|
|
1228
|
-
f"git ls-remote origin refs/tags/{tag_name}", self.local_dir
|
|
1229
|
-
).stdout.strip()
|
|
1178
|
+
result = run_subprocess(f"git ls-remote origin refs/tags/{tag_name}", self.local_dir).stdout.strip()
|
|
1230
1179
|
except subprocess.CalledProcessError as exc:
|
|
1231
1180
|
raise EnvironmentError(exc.stderr)
|
|
1232
1181
|
|
|
@@ -1265,25 +1214,19 @@ class Repository:
|
|
|
1265
1214
|
|
|
1266
1215
|
if delete_locally:
|
|
1267
1216
|
try:
|
|
1268
|
-
run_subprocess(
|
|
1269
|
-
["git", "tag", "-d", tag_name], self.local_dir
|
|
1270
|
-
).stdout.strip()
|
|
1217
|
+
run_subprocess(["git", "tag", "-d", tag_name], self.local_dir).stdout.strip()
|
|
1271
1218
|
except subprocess.CalledProcessError as exc:
|
|
1272
1219
|
raise EnvironmentError(exc.stderr)
|
|
1273
1220
|
|
|
1274
1221
|
if remote and delete_remotely:
|
|
1275
1222
|
try:
|
|
1276
|
-
run_subprocess(
|
|
1277
|
-
f"git push {remote} --delete {tag_name}", self.local_dir
|
|
1278
|
-
).stdout.strip()
|
|
1223
|
+
run_subprocess(f"git push {remote} --delete {tag_name}", self.local_dir).stdout.strip()
|
|
1279
1224
|
except subprocess.CalledProcessError as exc:
|
|
1280
1225
|
raise EnvironmentError(exc.stderr)
|
|
1281
1226
|
|
|
1282
1227
|
return True
|
|
1283
1228
|
|
|
1284
|
-
def add_tag(
|
|
1285
|
-
self, tag_name: str, message: Optional[str] = None, remote: Optional[str] = None
|
|
1286
|
-
):
|
|
1229
|
+
def add_tag(self, tag_name: str, message: Optional[str] = None, remote: Optional[str] = None):
|
|
1287
1230
|
"""
|
|
1288
1231
|
Add a tag at the current head and push it
|
|
1289
1232
|
|
|
@@ -1313,9 +1256,7 @@ class Repository:
|
|
|
1313
1256
|
|
|
1314
1257
|
if remote:
|
|
1315
1258
|
try:
|
|
1316
|
-
run_subprocess(
|
|
1317
|
-
f"git push {remote} {tag_name}", self.local_dir
|
|
1318
|
-
).stdout.strip()
|
|
1259
|
+
run_subprocess(f"git push {remote} {tag_name}", self.local_dir).stdout.strip()
|
|
1319
1260
|
except subprocess.CalledProcessError as exc:
|
|
1320
1261
|
raise EnvironmentError(exc.stderr)
|
|
1321
1262
|
|
|
@@ -1327,9 +1268,7 @@ class Repository:
|
|
|
1327
1268
|
`bool`: `True` if the git status is clean, `False` otherwise.
|
|
1328
1269
|
"""
|
|
1329
1270
|
try:
|
|
1330
|
-
git_status = run_subprocess(
|
|
1331
|
-
"git status --porcelain", self.local_dir
|
|
1332
|
-
).stdout.strip()
|
|
1271
|
+
git_status = run_subprocess("git status --porcelain", self.local_dir).stdout.strip()
|
|
1333
1272
|
except subprocess.CalledProcessError as exc:
|
|
1334
1273
|
raise EnvironmentError(exc.stderr)
|
|
1335
1274
|
|
|
@@ -1446,10 +1385,7 @@ class Repository:
|
|
|
1446
1385
|
logger.warning("Pulling changes ...")
|
|
1447
1386
|
self.git_pull(rebase=True)
|
|
1448
1387
|
else:
|
|
1449
|
-
logger.warning(
|
|
1450
|
-
"The current branch has no upstream branch. Will push to 'origin"
|
|
1451
|
-
f" {self.current_branch}'"
|
|
1452
|
-
)
|
|
1388
|
+
logger.warning(f"The current branch has no upstream branch. Will push to 'origin {self.current_branch}'")
|
|
1453
1389
|
|
|
1454
1390
|
current_working_directory = os.getcwd()
|
|
1455
1391
|
os.chdir(os.path.join(current_working_directory, self.local_dir))
|
|
@@ -1475,10 +1411,7 @@ class Repository:
|
|
|
1475
1411
|
except OSError as e:
|
|
1476
1412
|
# If no changes are detected, there is nothing to commit.
|
|
1477
1413
|
if "could not read Username" in str(e):
|
|
1478
|
-
raise OSError(
|
|
1479
|
-
"Couldn't authenticate user for push. Did you set"
|
|
1480
|
-
" `token` to `True`?"
|
|
1481
|
-
) from e
|
|
1414
|
+
raise OSError("Couldn't authenticate user for push. Did you set `token` to `True`?") from e
|
|
1482
1415
|
else:
|
|
1483
1416
|
raise e
|
|
1484
1417
|
|
|
@@ -1514,17 +1447,13 @@ class Repository:
|
|
|
1514
1447
|
"""
|
|
1515
1448
|
index = 0
|
|
1516
1449
|
for command_failed in self.commands_failed:
|
|
1517
|
-
logger.error(
|
|
1518
|
-
f"The {command_failed.title} command with PID"
|
|
1519
|
-
f" {command_failed._process.pid} failed."
|
|
1520
|
-
)
|
|
1450
|
+
logger.error(f"The {command_failed.title} command with PID {command_failed._process.pid} failed.")
|
|
1521
1451
|
logger.error(command_failed.stderr)
|
|
1522
1452
|
|
|
1523
1453
|
while self.commands_in_progress:
|
|
1524
1454
|
if index % 10 == 0:
|
|
1525
1455
|
logger.error(
|
|
1526
|
-
"Waiting for the following commands to finish before shutting"
|
|
1527
|
-
f" down: {self.commands_in_progress}."
|
|
1456
|
+
f"Waiting for the following commands to finish before shutting down: {self.commands_in_progress}."
|
|
1528
1457
|
)
|
|
1529
1458
|
|
|
1530
1459
|
index += 1
|