datamaestro 1.4.5__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- from abc import abstractmethod
2
1
  import logging
3
2
  from pathlib import Path
4
3
  from typing import Any, Dict
@@ -26,7 +25,6 @@ class Base(Config):
26
25
  """Download the dataset"""
27
26
  self.__datamaestro_dataset__.download()
28
27
 
29
- @abstractmethod
30
28
  def prepare(self, *args, **kwargs):
31
29
  """Prepare the dataset"""
32
30
  self.__datamaestro_dataset__.prepare()
@@ -204,6 +204,14 @@ class AbstractDataset(AbstractData):
204
204
  from datamaestro.data import Base
205
205
 
206
206
  if isinstance(data, Base):
207
+ try:
208
+ if data.id:
209
+ # There is already an ID, skip this
210
+ # and the descendants
211
+ return
212
+ except KeyError:
213
+ pass
214
+
207
215
  if self.repository is None:
208
216
  data.id = id
209
217
  else:
@@ -272,7 +280,7 @@ class DatasetWrapper(AbstractDataset):
272
280
  annotations (otherwise, derive from `AbstractDataset`).
273
281
  """
274
282
 
275
- def __init__(self, annotation, t: type):
283
+ def __init__(self, annotation: "dataset", t: type):
276
284
  self.config = None
277
285
  self.repository: Optional[Repository] = None
278
286
  self.t = t
@@ -290,6 +298,7 @@ class DatasetWrapper(AbstractDataset):
290
298
  # Set some variables
291
299
  self.url = annotation.url
292
300
  self.doi = annotation.doi
301
+ self.as_prepare = annotation.as_prepare
293
302
 
294
303
  # Builds the ID:
295
304
  # Removes module_name.config prefix
@@ -384,9 +393,16 @@ class DatasetWrapper(AbstractDataset):
384
393
 
385
394
  else:
386
395
  # Construct the object
387
- resources = {key: value.prepare() for key, value in self.resources.items()}
396
+ if self.as_prepare:
397
+ result = self.t(self, None)
398
+ else:
399
+ resources = {
400
+ key: value.prepare() for key, value in self.resources.items()
401
+ }
402
+ result = self.t(**resources)
388
403
 
389
- result = self.t.C(**resources)
404
+ if result is None:
405
+ raise RuntimeError(f"{self.base} did not return any resource")
390
406
 
391
407
  # Download resources
392
408
  logging.debug(
@@ -408,6 +424,8 @@ class DatasetWrapper(AbstractDataset):
408
424
  elif isinstance(result, self.base):
409
425
  self.config = result
410
426
  else:
427
+ name = self.t.__name__
428
+ filename = inspect.getfile(self.t)
411
429
  raise RuntimeError(
412
430
  f"The dataset method {name} defined in "
413
431
  f"{filename} returned an object of type {type(dict)}"
@@ -563,6 +581,7 @@ class dataset:
563
581
  :param url: The URL associated with the dataset.
564
582
  :param size: The size of the dataset (should be a parsable format).
565
583
  :param doi: The DOI of the corresponding paper.
584
+ :param as_prepare: Resources are setup within the method itself
566
585
  """
567
586
 
568
587
  def __init__(
@@ -574,6 +593,7 @@ class dataset:
574
593
  url: None | str = None,
575
594
  size: None | int | str = None,
576
595
  doi: None | str = None,
596
+ as_prepare: bool = False,
577
597
  ):
578
598
  if hasattr(base, "__datamaestro__") and isinstance(
579
599
  base.__datamaestro__, metadataset
@@ -588,6 +608,7 @@ class dataset:
588
608
  self.timestamp = timestamp
589
609
  self.size = size
590
610
  self.doi = doi
611
+ self.as_prepare = as_prepare
591
612
 
592
613
  def __call__(self, t):
593
614
  from datamaestro.data import Base
datamaestro/version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.4.5'
21
- __version_tuple__ = version_tuple = (1, 4, 5)
20
+ __version__ = version = '1.5.1'
21
+ __version_tuple__ = version_tuple = (1, 5, 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.4.5
3
+ Version: 1.5.1
4
4
  Summary: "Dataset management command line and API"
5
5
  Home-page: https://github.com/experimaestro/datamaestro
6
6
  Author: Benjamin Piwowarski
@@ -1,20 +1,20 @@
1
1
  datamaestro/__init__.py,sha256=LR8nx7H3Fo97O0gJXV2PxQezsmSTDLAg_nQEXB5QAjc,322
2
2
  datamaestro/__main__.py,sha256=2p36ZcJcZAL9NZBUkMaYRUhKyqhheVPXMGw6K1KNwhk,9196
3
3
  datamaestro/context.py,sha256=KsXYNTt4xX4zEVrnd2hciP7PVCh1StRzjU1Ih6VeCtU,13532
4
- datamaestro/definitions.py,sha256=pmHG-2UGMyzVwkfYAbrOi9TCTIAaxae3BhX3_i19jLY,18948
4
+ datamaestro/definitions.py,sha256=FiM_WZhF91f1H2c8ZardEV9jw5R-4zlkFWmzuTGQxos,19705
5
5
  datamaestro/record.py,sha256=IxxcrSIf99iluohtpnuMBTFkqeHRe5S-T_hWEqBgeME,5812
6
6
  datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
7
  datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
8
8
  datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
9
9
  datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
10
10
  datamaestro/utils.py,sha256=9m-AVVww6InAZfGFiGy6XJzfExpYNqH1fhWQEezjafA,6536
11
- datamaestro/version.py,sha256=Bgotoqdds4WNXiIfH89xsAubOtgFP60x-MwQ36sheLU,511
11
+ datamaestro/version.py,sha256=FasBapRYrTzegTWgTLGzFg84_c9eAAt8ZkOEeQxlLC4,511
12
12
  datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
13
13
  datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
14
14
  datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
16
16
  datamaestro/commands/site.py,sha256=7H9c-ZlXt7bUlldHn8fMebzDKS7B7ijPNKrxHXMG-Lk,14233
17
- datamaestro/data/__init__.py,sha256=Z1qZnliJwS5sRaLznK5YBVJCjvAlPbmJjbRvvLv_UVI,1547
17
+ datamaestro/data/__init__.py,sha256=s81ZxT8MQrBGkcu45xr4NaInIsMeunHOLnkLrJE47So,1496
18
18
  datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
19
19
  datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
20
20
  datamaestro/data/ml.py,sha256=7Rv4Tb9g17HDj8mOBJpIDjgolGQAd5Wrb0mHlnm-bPE,709
@@ -40,9 +40,9 @@ datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,
40
40
  datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
41
41
  datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
42
42
  datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
43
- datamaestro-1.4.5.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
44
- datamaestro-1.4.5.dist-info/METADATA,sha256=EbpwoGzR85Ex5RrdmEE1C-AsL9hbrYGNtAl8nIKgJQE,8191
45
- datamaestro-1.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
- datamaestro-1.4.5.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
47
- datamaestro-1.4.5.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
48
- datamaestro-1.4.5.dist-info/RECORD,,
43
+ datamaestro-1.5.1.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
44
+ datamaestro-1.5.1.dist-info/METADATA,sha256=r0FpdXB_X6gXcy3hdBDq06N1lP-6DN-RQ_65tYjRixc,8191
45
+ datamaestro-1.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
+ datamaestro-1.5.1.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
47
+ datamaestro-1.5.1.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
48
+ datamaestro-1.5.1.dist-info/RECORD,,