datamaestro 1.7.4__tar.gz → 1.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamaestro-1.7.4 → datamaestro-1.8.0}/PKG-INFO +9 -10
- {datamaestro-1.7.4 → datamaestro-1.8.0}/README.md +8 -9
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/datasets.rst +16 -13
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/index.md +8 -9
- datamaestro-1.8.0/release-notes.md +5 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/__init__.py +1 -1
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/context.py +2 -2
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/definitions.py +69 -7
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/version.py +1 -1
- datamaestro-1.7.4/release-notes.md +0 -5
- {datamaestro-1.7.4 → datamaestro-1.8.0}/.coverage +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/.flake8 +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/.github/workflows/pytest.yml +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/.github/workflows/python-publish.yml +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/.gitignore +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/.pre-commit-config.yaml +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/.python-version +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/.readthedocs.yml +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/CHANGELOG.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/LICENSE +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/MANIFEST.in +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/TODO.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/cliff.toml +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/Makefile +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/make.bat +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/requirements.txt +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/api/data.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/api/download.rst +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/api/index.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/api/records.rst +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/cli.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/conf.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/configuration.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/developping.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/getting-started.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/docs/source/style.css +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/pyproject.toml +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/pytest.ini +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/requirements-dev.txt +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/requirements.txt +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/schema.yaml +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/__main__.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/annotations/__init__.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/annotations/agreement.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/commands/__init__.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/commands/mainstyle.css +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/commands/site.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/data/__init__.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/data/csv.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/data/huggingface.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/data/ml.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/data/tensor.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/__init__.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/archive.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/custom.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/huggingface.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/links.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/manual.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/multiple.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/single.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/sync.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/todo.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/download/wayback.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/record.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/registry.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/search.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/settings.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/sphinx.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/stream/__init__.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/stream/compress.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/stream/lines.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/templates/dataset.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/test/__init__.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/test/checks.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/test/conftest.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/test/test_annotations.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/test/test_download_handlers.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/test/test_record.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/test/test_resource.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/utils.py +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/src/datamaestro/v2.md +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/tox.ini +0 -0
- {datamaestro-1.7.4 → datamaestro-1.8.0}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datamaestro
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.0
|
|
4
4
|
Summary: Add your description here
|
|
5
5
|
Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -141,11 +141,11 @@ and handles downloads with two-path safety and state tracking.
|
|
|
141
141
|
from datamaestro_image.data import ImageClassification, LabelledImages
|
|
142
142
|
from datamaestro.data.tensor import IDX
|
|
143
143
|
from datamaestro.download.single import FileDownloader
|
|
144
|
-
from datamaestro.definitions import
|
|
144
|
+
from datamaestro.definitions import Dataset, dataset
|
|
145
145
|
|
|
146
146
|
|
|
147
147
|
@dataset(url="http://yann.lecun.com/exdb/mnist/")
|
|
148
|
-
class MNIST(
|
|
148
|
+
class MNIST(Dataset):
|
|
149
149
|
"""The MNIST database of handwritten digits."""
|
|
150
150
|
|
|
151
151
|
TRAIN_IMAGES = FileDownloader(
|
|
@@ -165,16 +165,15 @@ class MNIST(ImageClassification):
|
|
|
165
165
|
"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
|
|
166
166
|
)
|
|
167
167
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
return cls.C(
|
|
168
|
+
def config(self) -> ImageClassification:
|
|
169
|
+
return ImageClassification.C(
|
|
171
170
|
train=LabelledImages(
|
|
172
|
-
images=IDX(path=
|
|
173
|
-
labels=IDX(path=
|
|
171
|
+
images=IDX(path=self.TRAIN_IMAGES.path),
|
|
172
|
+
labels=IDX(path=self.TRAIN_LABELS.path),
|
|
174
173
|
),
|
|
175
174
|
test=LabelledImages(
|
|
176
|
-
images=IDX(path=
|
|
177
|
-
labels=IDX(path=
|
|
175
|
+
images=IDX(path=self.TEST_IMAGES.path),
|
|
176
|
+
labels=IDX(path=self.TEST_LABELS.path),
|
|
178
177
|
),
|
|
179
178
|
)
|
|
180
179
|
```
|
|
@@ -106,11 +106,11 @@ and handles downloads with two-path safety and state tracking.
|
|
|
106
106
|
from datamaestro_image.data import ImageClassification, LabelledImages
|
|
107
107
|
from datamaestro.data.tensor import IDX
|
|
108
108
|
from datamaestro.download.single import FileDownloader
|
|
109
|
-
from datamaestro.definitions import
|
|
109
|
+
from datamaestro.definitions import Dataset, dataset
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
@dataset(url="http://yann.lecun.com/exdb/mnist/")
|
|
113
|
-
class MNIST(
|
|
113
|
+
class MNIST(Dataset):
|
|
114
114
|
"""The MNIST database of handwritten digits."""
|
|
115
115
|
|
|
116
116
|
TRAIN_IMAGES = FileDownloader(
|
|
@@ -130,16 +130,15 @@ class MNIST(ImageClassification):
|
|
|
130
130
|
"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
|
|
131
131
|
)
|
|
132
132
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
return cls.C(
|
|
133
|
+
def config(self) -> ImageClassification:
|
|
134
|
+
return ImageClassification.C(
|
|
136
135
|
train=LabelledImages(
|
|
137
|
-
images=IDX(path=
|
|
138
|
-
labels=IDX(path=
|
|
136
|
+
images=IDX(path=self.TRAIN_IMAGES.path),
|
|
137
|
+
labels=IDX(path=self.TRAIN_LABELS.path),
|
|
139
138
|
),
|
|
140
139
|
test=LabelledImages(
|
|
141
|
-
images=IDX(path=
|
|
142
|
-
labels=IDX(path=
|
|
140
|
+
images=IDX(path=self.TEST_IMAGES.path),
|
|
141
|
+
labels=IDX(path=self.TEST_LABELS.path),
|
|
143
142
|
),
|
|
144
143
|
)
|
|
145
144
|
```
|
|
@@ -30,11 +30,10 @@ Basic Example
|
|
|
30
30
|
from datamaestro_image.data import ImageClassification, LabelledImages
|
|
31
31
|
from datamaestro.data.tensor import IDX
|
|
32
32
|
from datamaestro.download.single import FileDownloader
|
|
33
|
-
from datamaestro.definitions import
|
|
34
|
-
|
|
33
|
+
from datamaestro.definitions import Dataset, dataset
|
|
35
34
|
|
|
36
35
|
@dataset(url="http://yann.lecun.com/exdb/mnist/")
|
|
37
|
-
class MNIST(
|
|
36
|
+
class MNIST(Dataset):
|
|
38
37
|
"""The MNIST database of handwritten digits."""
|
|
39
38
|
|
|
40
39
|
TRAIN_IMAGES = FileDownloader(
|
|
@@ -54,16 +53,15 @@ Basic Example
|
|
|
54
53
|
"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
|
|
55
54
|
)
|
|
56
55
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
return cls.C(
|
|
56
|
+
def config(self) -> ImageClassification:
|
|
57
|
+
return ImageClassification.C(
|
|
60
58
|
train=LabelledImages(
|
|
61
|
-
images=IDX(path=
|
|
62
|
-
labels=IDX(path=
|
|
59
|
+
images=IDX(path=self.TRAIN_IMAGES.path),
|
|
60
|
+
labels=IDX(path=self.TRAIN_LABELS.path),
|
|
63
61
|
),
|
|
64
62
|
test=LabelledImages(
|
|
65
|
-
images=IDX(path=
|
|
66
|
-
labels=IDX(path=
|
|
63
|
+
images=IDX(path=self.TEST_IMAGES.path),
|
|
64
|
+
labels=IDX(path=self.TEST_LABELS.path),
|
|
67
65
|
),
|
|
68
66
|
)
|
|
69
67
|
|
|
@@ -132,7 +130,7 @@ Parameters
|
|
|
132
130
|
* - ``base``
|
|
133
131
|
- The base data type class (e.g., ``ImageClassification``). Can be inferred from the class hierarchy.
|
|
134
132
|
* - ``id``
|
|
135
|
-
- Override the automatic ID. Use ``"."`` prefix to
|
|
133
|
+
- Override the automatic ID. Use a ``"."`` prefix to append to the module path (e.g., ``".8.topics"`` becomes ``module.path.8.topics``).
|
|
136
134
|
* - ``url``
|
|
137
135
|
- URL to the dataset's homepage.
|
|
138
136
|
* - ``doi``
|
|
@@ -141,7 +139,7 @@ Parameters
|
|
|
141
139
|
- Version timestamp for evolving datasets.
|
|
142
140
|
* - ``size``
|
|
143
141
|
- Dataset size (for documentation).
|
|
144
|
-
* - ``as_prepare``
|
|
142
|
+
* - ``as_prepare`` (deprecated)
|
|
145
143
|
- If True, the function receives the dataset object for manual resource handling.
|
|
146
144
|
|
|
147
145
|
ID Override Examples
|
|
@@ -154,7 +152,12 @@ ID Override Examples
|
|
|
154
152
|
class IgnoredName(MyType):
|
|
155
153
|
...
|
|
156
154
|
|
|
157
|
-
#
|
|
155
|
+
# Append suffix to module path (in module gov.nist.trec.adhoc)
|
|
156
|
+
@dataset(MyType, id=".8.topics") # Results in gov.nist.trec.adhoc.8.topics
|
|
157
|
+
class Trec8Topics(MyType):
|
|
158
|
+
...
|
|
159
|
+
|
|
160
|
+
# Single component suffix (in module com.example)
|
|
158
161
|
@dataset(MyType, id=".v2") # Results in com.example.v2
|
|
159
162
|
class Original(MyType):
|
|
160
163
|
...
|
|
@@ -111,11 +111,11 @@ Datasets are defined as Python classes with resource attributes that describe ho
|
|
|
111
111
|
from datamaestro_image.data import ImageClassification, LabelledImages
|
|
112
112
|
from datamaestro.data.tensor import IDX
|
|
113
113
|
from datamaestro.download.single import FileDownloader
|
|
114
|
-
from datamaestro.definitions import
|
|
114
|
+
from datamaestro.definitions import Dataset, dataset
|
|
115
115
|
|
|
116
116
|
|
|
117
117
|
@dataset(url="http://yann.lecun.com/exdb/mnist/")
|
|
118
|
-
class MNIST(
|
|
118
|
+
class MNIST(Dataset):
|
|
119
119
|
"""The MNIST database of handwritten digits."""
|
|
120
120
|
|
|
121
121
|
TRAIN_IMAGES = FileDownloader(
|
|
@@ -135,16 +135,15 @@ class MNIST(ImageClassification):
|
|
|
135
135
|
"http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",
|
|
136
136
|
)
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
return cls.C(
|
|
138
|
+
def config(self) -> ImageClassification:
|
|
139
|
+
return ImageClassification.C(
|
|
141
140
|
train=LabelledImages(
|
|
142
|
-
images=IDX(path=
|
|
143
|
-
labels=IDX(path=
|
|
141
|
+
images=IDX(path=self.TRAIN_IMAGES.path),
|
|
142
|
+
labels=IDX(path=self.TRAIN_LABELS.path),
|
|
144
143
|
),
|
|
145
144
|
test=LabelledImages(
|
|
146
|
-
images=IDX(path=
|
|
147
|
-
labels=IDX(path=
|
|
145
|
+
images=IDX(path=self.TEST_IMAGES.path),
|
|
146
|
+
labels=IDX(path=self.TEST_LABELS.path),
|
|
148
147
|
),
|
|
149
148
|
)
|
|
150
149
|
```
|
|
@@ -282,7 +282,7 @@ class Datasets(Iterable["AbstractDataset"]):
|
|
|
282
282
|
self._description = "\n".join(description)
|
|
283
283
|
|
|
284
284
|
def __iter__(self) -> Iterable["AbstractDataset"]:
|
|
285
|
-
from .definitions import DatasetWrapper
|
|
285
|
+
from .definitions import DatasetWrapper, Dataset
|
|
286
286
|
from datamaestro.data import Base
|
|
287
287
|
|
|
288
288
|
# Iterates over defined symbols
|
|
@@ -294,7 +294,7 @@ class Datasets(Iterable["AbstractDataset"]):
|
|
|
294
294
|
yield value
|
|
295
295
|
elif (
|
|
296
296
|
inspect.isclass(value)
|
|
297
|
-
and issubclass(value, Base)
|
|
297
|
+
and (issubclass(value, Base) or issubclass(value, Dataset))
|
|
298
298
|
and hasattr(value, "__dataset__")
|
|
299
299
|
):
|
|
300
300
|
if self.module.__name__ == value.__module__:
|
|
@@ -546,10 +546,16 @@ class DatasetWrapper(AbstractDataset):
|
|
|
546
546
|
# There is nothing, use the full path
|
|
547
547
|
path = ".".join(components[1:])
|
|
548
548
|
else:
|
|
549
|
-
# Replace
|
|
549
|
+
# Replace the class name with the provided suffix
|
|
550
550
|
path = ".".join(components[1:-1])
|
|
551
551
|
if annotation.id != "":
|
|
552
|
-
|
|
552
|
+
# Strip leading dot if present (e.g., ".8.topics" -> "8.topics")
|
|
553
|
+
suffix = (
|
|
554
|
+
annotation.id[1:]
|
|
555
|
+
if annotation.id.startswith(".")
|
|
556
|
+
else annotation.id
|
|
557
|
+
)
|
|
558
|
+
path = f"{path}.{suffix}"
|
|
553
559
|
|
|
554
560
|
self.id = path
|
|
555
561
|
else:
|
|
@@ -611,8 +617,13 @@ class DatasetWrapper(AbstractDataset):
|
|
|
611
617
|
if self.config is not None:
|
|
612
618
|
return self.config
|
|
613
619
|
|
|
620
|
+
# Dataset subclass with config() method
|
|
621
|
+
if inspect.isclass(self.t) and issubclass(self.t, Dataset):
|
|
622
|
+
instance = self.t()
|
|
623
|
+
self.config = instance.config()
|
|
624
|
+
|
|
614
625
|
# Direct creation of the dataset
|
|
615
|
-
|
|
626
|
+
elif self.base is self.t:
|
|
616
627
|
self.config = self.base.__create_dataset__(self)
|
|
617
628
|
|
|
618
629
|
elif hasattr(self.t, "__create_dataset__"):
|
|
@@ -719,6 +730,9 @@ class DataAnnotation:
|
|
|
719
730
|
else:
|
|
720
731
|
if "__datamaestro__" in object.__dict__:
|
|
721
732
|
self.annotate(object.__datamaestro__)
|
|
733
|
+
elif "__dataset__" in object.__dict__:
|
|
734
|
+
# Dataset subclass decorated with @dataset
|
|
735
|
+
self.annotate(object.__dataset__)
|
|
722
736
|
else:
|
|
723
737
|
# With configuration objects, add a __datamaestro__ member to the class
|
|
724
738
|
assert issubclass(object, Config), (
|
|
@@ -815,9 +829,10 @@ class dataset:
|
|
|
815
829
|
|
|
816
830
|
:param base: The base type (or None if inferred from type annotation).
|
|
817
831
|
:param timestamp: If the dataset evolves, specify its timestamp.
|
|
818
|
-
:param id:
|
|
819
|
-
|
|
820
|
-
|
|
832
|
+
:param id: Dataset ID override. Behavior depends on format:
|
|
833
|
+
- Full ID (e.g., "com.example.data"): used as-is if it has 3+ components
|
|
834
|
+
- Suffix with dot prefix (e.g., ".8.topics"): appended to module path
|
|
835
|
+
- Single component (e.g., "mnist"): replaces the class name in the path
|
|
821
836
|
:param url: The URL associated with the dataset.
|
|
822
837
|
:param size: The size of the dataset (should be a parsable format).
|
|
823
838
|
:param doi: The DOI of the corresponding paper.
|
|
@@ -857,6 +872,17 @@ class dataset:
|
|
|
857
872
|
if self.base is None:
|
|
858
873
|
if inspect.isclass(t) and issubclass(t, Base):
|
|
859
874
|
self.base = t
|
|
875
|
+
elif inspect.isclass(t) and issubclass(t, Dataset):
|
|
876
|
+
# Infer base from config() return annotation
|
|
877
|
+
try:
|
|
878
|
+
config_method = t.config
|
|
879
|
+
return_type = config_method.__annotations__["return"]
|
|
880
|
+
if isinstance(return_type, _GenericAlias):
|
|
881
|
+
return_type = return_type.__origin__
|
|
882
|
+
self.base = return_type
|
|
883
|
+
except (KeyError, AttributeError):
|
|
884
|
+
logging.warning("No return annotation on config() in %s", t)
|
|
885
|
+
raise
|
|
860
886
|
else:
|
|
861
887
|
try:
|
|
862
888
|
# Get type from return annotation
|
|
@@ -875,12 +901,48 @@ class dataset:
|
|
|
875
901
|
t.__dataset__ = dw
|
|
876
902
|
|
|
877
903
|
# For class-based datasets, scan for Resource class attributes
|
|
878
|
-
if inspect.isclass(t) and issubclass(t, Base):
|
|
904
|
+
if inspect.isclass(t) and (issubclass(t, Base) or issubclass(t, Dataset)):
|
|
879
905
|
_bind_class_resources(t, dw)
|
|
880
906
|
return t
|
|
881
907
|
return dw
|
|
882
908
|
|
|
883
909
|
|
|
910
|
+
class Dataset(ABC):
|
|
911
|
+
"""Base class for simplified dataset definitions.
|
|
912
|
+
|
|
913
|
+
Inherit from this class and use the ``@dataset`` decorator.
|
|
914
|
+
Resources are defined as class attributes and accessed via ``self``.
|
|
915
|
+
|
|
916
|
+
Example::
|
|
917
|
+
|
|
918
|
+
@dataset(url="http://yann.lecun.com/exdb/mnist/")
|
|
919
|
+
class MNIST(Dataset):
|
|
920
|
+
\"\"\"The MNIST database of handwritten digits.\"\"\"
|
|
921
|
+
|
|
922
|
+
TRAIN_IMAGES = FileDownloader("train.idx", "http://...")
|
|
923
|
+
TEST_IMAGES = FileDownloader("test.idx", "http://...")
|
|
924
|
+
|
|
925
|
+
def config(self) -> ImageClassification:
|
|
926
|
+
return ImageClassification.C(
|
|
927
|
+
train=IDX(path=self.TRAIN_IMAGES.path),
|
|
928
|
+
test=IDX(path=self.TEST_IMAGES.path),
|
|
929
|
+
)
|
|
930
|
+
"""
|
|
931
|
+
|
|
932
|
+
@abstractmethod
|
|
933
|
+
def config(self) -> "Base":
|
|
934
|
+
"""Create and return the dataset configuration.
|
|
935
|
+
|
|
936
|
+
Override this method to construct and return the data object.
|
|
937
|
+
Resources are accessible via ``self.RESOURCE_NAME.path`` or
|
|
938
|
+
``self.RESOURCE_NAME.prepare()``.
|
|
939
|
+
|
|
940
|
+
Returns:
|
|
941
|
+
A Config instance (typically created via ``SomeType.C(...)``).
|
|
942
|
+
"""
|
|
943
|
+
...
|
|
944
|
+
|
|
945
|
+
|
|
884
946
|
class metadataset(AbstractDataset):
|
|
885
947
|
"""Annotation for object/functions which are abstract dataset definitions
|
|
886
948
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|