datamaestro 1.4.5__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamaestro/data/__init__.py +0 -2
- datamaestro/definitions.py +24 -3
- datamaestro/version.py +2 -2
- {datamaestro-1.4.5.dist-info → datamaestro-1.5.1.dist-info}/METADATA +1 -1
- {datamaestro-1.4.5.dist-info → datamaestro-1.5.1.dist-info}/RECORD +9 -9
- {datamaestro-1.4.5.dist-info → datamaestro-1.5.1.dist-info}/WHEEL +0 -0
- {datamaestro-1.4.5.dist-info → datamaestro-1.5.1.dist-info}/entry_points.txt +0 -0
- {datamaestro-1.4.5.dist-info → datamaestro-1.5.1.dist-info}/licenses/LICENSE +0 -0
- {datamaestro-1.4.5.dist-info → datamaestro-1.5.1.dist-info}/top_level.txt +0 -0
datamaestro/data/__init__.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from abc import abstractmethod
|
|
2
1
|
import logging
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
from typing import Any, Dict
|
|
@@ -26,7 +25,6 @@ class Base(Config):
|
|
|
26
25
|
"""Download the dataset"""
|
|
27
26
|
self.__datamaestro_dataset__.download()
|
|
28
27
|
|
|
29
|
-
@abstractmethod
|
|
30
28
|
def prepare(self, *args, **kwargs):
|
|
31
29
|
"""Prepare the dataset"""
|
|
32
30
|
self.__datamaestro_dataset__.prepare()
|
datamaestro/definitions.py
CHANGED
|
@@ -204,6 +204,14 @@ class AbstractDataset(AbstractData):
|
|
|
204
204
|
from datamaestro.data import Base
|
|
205
205
|
|
|
206
206
|
if isinstance(data, Base):
|
|
207
|
+
try:
|
|
208
|
+
if data.id:
|
|
209
|
+
# There is already an ID, skip this
|
|
210
|
+
# and the descendants
|
|
211
|
+
return
|
|
212
|
+
except KeyError:
|
|
213
|
+
pass
|
|
214
|
+
|
|
207
215
|
if self.repository is None:
|
|
208
216
|
data.id = id
|
|
209
217
|
else:
|
|
@@ -272,7 +280,7 @@ class DatasetWrapper(AbstractDataset):
|
|
|
272
280
|
annotations (otherwise, derive from `AbstractDataset`).
|
|
273
281
|
"""
|
|
274
282
|
|
|
275
|
-
def __init__(self, annotation, t: type):
|
|
283
|
+
def __init__(self, annotation: "dataset", t: type):
|
|
276
284
|
self.config = None
|
|
277
285
|
self.repository: Optional[Repository] = None
|
|
278
286
|
self.t = t
|
|
@@ -290,6 +298,7 @@ class DatasetWrapper(AbstractDataset):
|
|
|
290
298
|
# Set some variables
|
|
291
299
|
self.url = annotation.url
|
|
292
300
|
self.doi = annotation.doi
|
|
301
|
+
self.as_prepare = annotation.as_prepare
|
|
293
302
|
|
|
294
303
|
# Builds the ID:
|
|
295
304
|
# Removes module_name.config prefix
|
|
@@ -384,9 +393,16 @@ class DatasetWrapper(AbstractDataset):
|
|
|
384
393
|
|
|
385
394
|
else:
|
|
386
395
|
# Construct the object
|
|
387
|
-
|
|
396
|
+
if self.as_prepare:
|
|
397
|
+
result = self.t(self, None)
|
|
398
|
+
else:
|
|
399
|
+
resources = {
|
|
400
|
+
key: value.prepare() for key, value in self.resources.items()
|
|
401
|
+
}
|
|
402
|
+
result = self.t(**resources)
|
|
388
403
|
|
|
389
|
-
result
|
|
404
|
+
if result is None:
|
|
405
|
+
raise RuntimeError(f"{self.base} did not return any resource")
|
|
390
406
|
|
|
391
407
|
# Download resources
|
|
392
408
|
logging.debug(
|
|
@@ -408,6 +424,8 @@ class DatasetWrapper(AbstractDataset):
|
|
|
408
424
|
elif isinstance(result, self.base):
|
|
409
425
|
self.config = result
|
|
410
426
|
else:
|
|
427
|
+
name = self.t.__name__
|
|
428
|
+
filename = inspect.getfile(self.t)
|
|
411
429
|
raise RuntimeError(
|
|
412
430
|
f"The dataset method {name} defined in "
|
|
413
431
|
f"{filename} returned an object of type {type(dict)}"
|
|
@@ -563,6 +581,7 @@ class dataset:
|
|
|
563
581
|
:param url: The URL associated with the dataset.
|
|
564
582
|
:param size: The size of the dataset (should be a parsable format).
|
|
565
583
|
:param doi: The DOI of the corresponding paper.
|
|
584
|
+
:param as_prepare: Resources are setup within the method itself
|
|
566
585
|
"""
|
|
567
586
|
|
|
568
587
|
def __init__(
|
|
@@ -574,6 +593,7 @@ class dataset:
|
|
|
574
593
|
url: None | str = None,
|
|
575
594
|
size: None | int | str = None,
|
|
576
595
|
doi: None | str = None,
|
|
596
|
+
as_prepare: bool = False,
|
|
577
597
|
):
|
|
578
598
|
if hasattr(base, "__datamaestro__") and isinstance(
|
|
579
599
|
base.__datamaestro__, metadataset
|
|
@@ -588,6 +608,7 @@ class dataset:
|
|
|
588
608
|
self.timestamp = timestamp
|
|
589
609
|
self.size = size
|
|
590
610
|
self.doi = doi
|
|
611
|
+
self.as_prepare = as_prepare
|
|
591
612
|
|
|
592
613
|
def __call__(self, t):
|
|
593
614
|
from datamaestro.data import Base
|
datamaestro/version.py
CHANGED
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
datamaestro/__init__.py,sha256=LR8nx7H3Fo97O0gJXV2PxQezsmSTDLAg_nQEXB5QAjc,322
|
|
2
2
|
datamaestro/__main__.py,sha256=2p36ZcJcZAL9NZBUkMaYRUhKyqhheVPXMGw6K1KNwhk,9196
|
|
3
3
|
datamaestro/context.py,sha256=KsXYNTt4xX4zEVrnd2hciP7PVCh1StRzjU1Ih6VeCtU,13532
|
|
4
|
-
datamaestro/definitions.py,sha256=
|
|
4
|
+
datamaestro/definitions.py,sha256=FiM_WZhF91f1H2c8ZardEV9jw5R-4zlkFWmzuTGQxos,19705
|
|
5
5
|
datamaestro/record.py,sha256=IxxcrSIf99iluohtpnuMBTFkqeHRe5S-T_hWEqBgeME,5812
|
|
6
6
|
datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
|
|
7
7
|
datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
|
|
8
8
|
datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
|
|
9
9
|
datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
|
|
10
10
|
datamaestro/utils.py,sha256=9m-AVVww6InAZfGFiGy6XJzfExpYNqH1fhWQEezjafA,6536
|
|
11
|
-
datamaestro/version.py,sha256=
|
|
11
|
+
datamaestro/version.py,sha256=FasBapRYrTzegTWgTLGzFg84_c9eAAt8ZkOEeQxlLC4,511
|
|
12
12
|
datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
|
|
13
13
|
datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
|
|
14
14
|
datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
|
|
16
16
|
datamaestro/commands/site.py,sha256=7H9c-ZlXt7bUlldHn8fMebzDKS7B7ijPNKrxHXMG-Lk,14233
|
|
17
|
-
datamaestro/data/__init__.py,sha256=
|
|
17
|
+
datamaestro/data/__init__.py,sha256=s81ZxT8MQrBGkcu45xr4NaInIsMeunHOLnkLrJE47So,1496
|
|
18
18
|
datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
|
|
19
19
|
datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
|
|
20
20
|
datamaestro/data/ml.py,sha256=7Rv4Tb9g17HDj8mOBJpIDjgolGQAd5Wrb0mHlnm-bPE,709
|
|
@@ -40,9 +40,9 @@ datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,
|
|
|
40
40
|
datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
|
|
41
41
|
datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
|
|
42
42
|
datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
|
|
43
|
-
datamaestro-1.
|
|
44
|
-
datamaestro-1.
|
|
45
|
-
datamaestro-1.
|
|
46
|
-
datamaestro-1.
|
|
47
|
-
datamaestro-1.
|
|
48
|
-
datamaestro-1.
|
|
43
|
+
datamaestro-1.5.1.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
|
|
44
|
+
datamaestro-1.5.1.dist-info/METADATA,sha256=r0FpdXB_X6gXcy3hdBDq06N1lP-6DN-RQ_65tYjRixc,8191
|
|
45
|
+
datamaestro-1.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
46
|
+
datamaestro-1.5.1.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
|
|
47
|
+
datamaestro-1.5.1.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
|
|
48
|
+
datamaestro-1.5.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|