datamaestro 1.7.4__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamaestro/__init__.py +1 -1
- datamaestro/context.py +2 -2
- datamaestro/definitions.py +69 -7
- datamaestro/version.py +1 -1
- {datamaestro-1.7.4.dist-info → datamaestro-1.8.0.dist-info}/METADATA +9 -10
- {datamaestro-1.7.4.dist-info → datamaestro-1.8.0.dist-info}/RECORD +9 -9
- {datamaestro-1.7.4.dist-info → datamaestro-1.8.0.dist-info}/WHEEL +0 -0
- {datamaestro-1.7.4.dist-info → datamaestro-1.8.0.dist-info}/entry_points.txt +0 -0
- {datamaestro-1.7.4.dist-info → datamaestro-1.8.0.dist-info}/licenses/LICENSE +0 -0
datamaestro/__init__.py
CHANGED
datamaestro/context.py
CHANGED
|
@@ -282,7 +282,7 @@ class Datasets(Iterable["AbstractDataset"]):
|
|
|
282
282
|
self._description = "\n".join(description)
|
|
283
283
|
|
|
284
284
|
def __iter__(self) -> Iterable["AbstractDataset"]:
|
|
285
|
-
from .definitions import DatasetWrapper
|
|
285
|
+
from .definitions import DatasetWrapper, Dataset
|
|
286
286
|
from datamaestro.data import Base
|
|
287
287
|
|
|
288
288
|
# Iterates over defined symbols
|
|
@@ -294,7 +294,7 @@ class Datasets(Iterable["AbstractDataset"]):
|
|
|
294
294
|
yield value
|
|
295
295
|
elif (
|
|
296
296
|
inspect.isclass(value)
|
|
297
|
-
and issubclass(value, Base)
|
|
297
|
+
and (issubclass(value, Base) or issubclass(value, Dataset))
|
|
298
298
|
and hasattr(value, "__dataset__")
|
|
299
299
|
):
|
|
300
300
|
if self.module.__name__ == value.__module__:
|
datamaestro/definitions.py
CHANGED
|
@@ -546,10 +546,16 @@ class DatasetWrapper(AbstractDataset):
|
|
|
546
546
|
# There is nothing, use the full path
|
|
547
547
|
path = ".".join(components[1:])
|
|
548
548
|
else:
|
|
549
|
-
# Replace
|
|
549
|
+
# Replace the class name with the provided suffix
|
|
550
550
|
path = ".".join(components[1:-1])
|
|
551
551
|
if annotation.id != "":
|
|
552
|
-
|
|
552
|
+
# Strip leading dot if present (e.g., ".8.topics" -> "8.topics")
|
|
553
|
+
suffix = (
|
|
554
|
+
annotation.id[1:]
|
|
555
|
+
if annotation.id.startswith(".")
|
|
556
|
+
else annotation.id
|
|
557
|
+
)
|
|
558
|
+
path = f"{path}.{suffix}"
|
|
553
559
|
|
|
554
560
|
self.id = path
|
|
555
561
|
else:
|
|
@@ -611,8 +617,13 @@ class DatasetWrapper(AbstractDataset):
|
|
|
611
617
|
if self.config is not None:
|
|
612
618
|
return self.config
|
|
613
619
|
|
|
620
|
+
# Dataset subclass with config() method
|
|
621
|
+
if inspect.isclass(self.t) and issubclass(self.t, Dataset):
|
|
622
|
+
instance = self.t()
|
|
623
|
+
self.config = instance.config()
|
|
624
|
+
|
|
614
625
|
# Direct creation of the dataset
|
|
615
|
-
|
|
626
|
+
elif self.base is self.t:
|
|
616
627
|
self.config = self.base.__create_dataset__(self)
|
|
617
628
|
|
|
618
629
|
elif hasattr(self.t, "__create_dataset__"):
|
|
@@ -719,6 +730,9 @@ class DataAnnotation:
|
|
|
719
730
|
else:
|
|
720
731
|
if "__datamaestro__" in object.__dict__:
|
|
721
732
|
self.annotate(object.__datamaestro__)
|
|
733
|
+
elif "__dataset__" in object.__dict__:
|
|
734
|
+
# Dataset subclass decorated with @dataset
|
|
735
|
+
self.annotate(object.__dataset__)
|
|
722
736
|
else:
|
|
723
737
|
# With configuration objects, add a __datamaestro__ member to the class
|
|
724
738
|
assert issubclass(object, Config), (
|
|
@@ -815,9 +829,10 @@ class dataset:
|
|
|
815
829
|
|
|
816
830
|
:param base: The base type (or None if inferred from type annotation).
|
|
817
831
|
:param timestamp: If the dataset evolves, specify its timestamp.
|
|
818
|
-
:param id:
|
|
819
|
-
|
|
820
|
-
|
|
832
|
+
:param id: Dataset ID override. Behavior depends on format:
|
|
833
|
+
- Full ID (e.g., "com.example.data"): used as-is if it has 3+ components
|
|
834
|
+
- Suffix with dot prefix (e.g., ".8.topics"): appended to module path
|
|
835
|
+
- Single component (e.g., "mnist"): replaces the class name in the path
|
|
821
836
|
:param url: The URL associated with the dataset.
|
|
822
837
|
:param size: The size of the dataset (should be a parsable format).
|
|
823
838
|
:param doi: The DOI of the corresponding paper.
|
|
@@ -857,6 +872,17 @@ class dataset:
|
|
|
857
872
|
if self.base is None:
|
|
858
873
|
if inspect.isclass(t) and issubclass(t, Base):
|
|
859
874
|
self.base = t
|
|
875
|
+
elif inspect.isclass(t) and issubclass(t, Dataset):
|
|
876
|
+
# Infer base from config() return annotation
|
|
877
|
+
try:
|
|
878
|
+
config_method = t.config
|
|
879
|
+
return_type = config_method.__annotations__["return"]
|
|
880
|
+
if isinstance(return_type, _GenericAlias):
|
|
881
|
+
return_type = return_type.__origin__
|
|
882
|
+
self.base = return_type
|
|
883
|
+
except (KeyError, AttributeError):
|
|
884
|
+
logging.warning("No return annotation on config() in %s", t)
|
|
885
|
+
raise
|
|
860
886
|
else:
|
|
861
887
|
try:
|
|
862
888
|
# Get type from return annotation
|
|
@@ -875,12 +901,48 @@ class dataset:
|
|
|
875
901
|
t.__dataset__ = dw
|
|
876
902
|
|
|
877
903
|
# For class-based datasets, scan for Resource class attributes
|
|
878
|
-
if inspect.isclass(t) and issubclass(t, Base):
|
|
904
|
+
if inspect.isclass(t) and (issubclass(t, Base) or issubclass(t, Dataset)):
|
|
879
905
|
_bind_class_resources(t, dw)
|
|
880
906
|
return t
|
|
881
907
|
return dw
|
|
882
908
|
|
|
883
909
|
|
|
910
|
+
class Dataset(ABC):
|
|
911
|
+
"""Base class for simplified dataset definitions.
|
|
912
|
+
|
|
913
|
+
Inherit from this class and use the ``@dataset`` decorator.
|
|
914
|
+
Resources are defined as class attributes and accessed via ``self``.
|
|
915
|
+
|
|
916
|
+
Example::
|
|
917
|
+
|
|
918
|
+
@dataset(url="http://yann.lecun.com/exdb/mnist/")
|
|
919
|
+
class MNIST(Dataset):
|
|
920
|
+
\"\"\"The MNIST database of handwritten digits.\"\"\"
|
|
921
|
+
|
|
922
|
+
TRAIN_IMAGES = FileDownloader("train.idx", "http://...")
|
|
923
|
+
TEST_IMAGES = FileDownloader("test.idx", "http://...")
|
|
924
|
+
|
|
925
|
+
def config(self) -> ImageClassification:
|
|
926
|
+
return ImageClassification.C(
|
|
927
|
+
train=IDX(path=self.TRAIN_IMAGES.path),
|
|
928
|
+
test=IDX(path=self.TEST_IMAGES.path),
|
|
929
|
+
)
|
|
930
|
+
"""
|
|
931
|
+
|
|
932
|
+
@abstractmethod
|
|
933
|
+
def config(self) -> "Base":
|
|
934
|
+
"""Create and return the dataset configuration.
|
|
935
|
+
|
|
936
|
+
Override this method to construct and return the data object.
|
|
937
|
+
Resources are accessible via ``self.RESOURCE_NAME.path`` or
|
|
938
|
+
``self.RESOURCE_NAME.prepare()``.
|
|
939
|
+
|
|
940
|
+
Returns:
|
|
941
|
+
A Config instance (typically created via ``SomeType.C(...)``).
|
|
942
|
+
"""
|
|
943
|
+
...
|
|
944
|
+
|
|
945
|
+
|
|
884
946
|
class metadataset(AbstractDataset):
|
|
885
947
|
"""Annotation for object/functions which are abstract dataset definitions
|
|
886
948
|
|
datamaestro/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datamaestro
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: Add your description here
|
|
5
5
|
Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -141,11 +141,11 @@ and handles downloads with two-path safety and state tracking.
|
|
|
141
141
|
from datamaestro_image.data import ImageClassification, LabelledImages
|
|
142
142
|
from datamaestro.data.tensor import IDX
|
|
143
143
|
from datamaestro.download.single import FileDownloader
|
|
144
|
-
from datamaestro.definitions import
|
|
144
|
+
from datamaestro.definitions import Dataset, dataset
|
|
145
145
|
|
|
146
146
|
|
|
147
147
|
@dataset(url="http://yann.lecun.com/exdb/mnist/")
|
|
148
|
-
class MNIST(
|
|
148
|
+
class MNIST(Dataset):
|
|
149
149
|
"""The MNIST database of handwritten digits."""
|
|
150
150
|
|
|
151
151
|
TRAIN_IMAGES = FileDownloader(
|
|
@@ -165,16 +165,15 @@ class MNIST(ImageClassification):
|
|
|
165
165
|
"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
|
|
166
166
|
)
|
|
167
167
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
return cls.C(
|
|
168
|
+
def config(self) -> ImageClassification:
|
|
169
|
+
return ImageClassification.C(
|
|
171
170
|
train=LabelledImages(
|
|
172
|
-
images=IDX(path=
|
|
173
|
-
labels=IDX(path=
|
|
171
|
+
images=IDX(path=self.TRAIN_IMAGES.path),
|
|
172
|
+
labels=IDX(path=self.TRAIN_LABELS.path),
|
|
174
173
|
),
|
|
175
174
|
test=LabelledImages(
|
|
176
|
-
images=IDX(path=
|
|
177
|
-
labels=IDX(path=
|
|
175
|
+
images=IDX(path=self.TEST_IMAGES.path),
|
|
176
|
+
labels=IDX(path=self.TEST_LABELS.path),
|
|
178
177
|
),
|
|
179
178
|
)
|
|
180
179
|
```
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
datamaestro/__init__.py,sha256=
|
|
1
|
+
datamaestro/__init__.py,sha256=ZquS8JzBV8aUYjiU758IIdoepk3o2uAqQqI1KRgwQ7Q,255
|
|
2
2
|
datamaestro/__main__.py,sha256=22v54rQoO2umL1frFO2FOQuuRljr-Jw-ER-OATTpVxw,9218
|
|
3
|
-
datamaestro/context.py,sha256=
|
|
4
|
-
datamaestro/definitions.py,sha256=
|
|
3
|
+
datamaestro/context.py,sha256=KFStZf4z1eJT4A47uvDalTTHkBgv5l4KXHDY-amqKf0,14153
|
|
4
|
+
datamaestro/definitions.py,sha256=DvinFtwPN_V7oaD3SILOnYTT_VKyrvDRupJPXT9qOGQ,30611
|
|
5
5
|
datamaestro/record.py,sha256=e5fjRV3ni7ZxXwYH45bVDB_jpD-n9quvh4ie4uI-MM4,7140
|
|
6
6
|
datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
|
|
7
7
|
datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
|
|
@@ -9,7 +9,7 @@ datamaestro/settings.py,sha256=NuUbe_C31GDlzdio2ryz7tPzuo4hsmmdCM5Cyuhqbzs,1294
|
|
|
9
9
|
datamaestro/sphinx.py,sha256=WWXB63gd0ZgEwFr_YwO2Hmuly5OoiFlu9mDvJSHFYuY,6966
|
|
10
10
|
datamaestro/utils.py,sha256=JUrvtVYnjNKRo0_ZypmXSQ9R4uOyImDjW1GZ14MYzKM,6547
|
|
11
11
|
datamaestro/v2.md,sha256=pLCxQUdfVkd4CM9Ie0ZxCnxUntqoA7k_0m7x1etcr7Y,9801
|
|
12
|
-
datamaestro/version.py,sha256=
|
|
12
|
+
datamaestro/version.py,sha256=7vKzAvyE5qa683r17tWnXJ25jKqnqX-fQ76AkxrXL_E,171
|
|
13
13
|
datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
|
|
14
14
|
datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
|
|
15
15
|
datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -42,8 +42,8 @@ datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_Eg
|
|
|
42
42
|
datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
|
|
43
43
|
datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
|
|
44
44
|
datamaestro/test/test_resource.py,sha256=QbwmZkGv_8O_jI0CKcatJSUs3IKbMfBrk0T_aTC1KcE,51124
|
|
45
|
-
datamaestro-1.
|
|
46
|
-
datamaestro-1.
|
|
47
|
-
datamaestro-1.
|
|
48
|
-
datamaestro-1.
|
|
49
|
-
datamaestro-1.
|
|
45
|
+
datamaestro-1.8.0.dist-info/METADATA,sha256=mHgkI5X1um_WLTvCyjsFH3HTv_O-5SdkGuPysfCMsrg,7402
|
|
46
|
+
datamaestro-1.8.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
47
|
+
datamaestro-1.8.0.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
|
|
48
|
+
datamaestro-1.8.0.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
|
|
49
|
+
datamaestro-1.8.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|