datamaestro 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datamaestro/__init__.py CHANGED
@@ -2,10 +2,14 @@
2
2
  from .context import (
3
3
  Context,
4
4
  Repository,
5
+ BaseRepository,
5
6
  get_dataset,
6
7
  prepare_dataset,
7
8
  )
8
9
 
9
- from pkg_resources import get_distribution, DistributionNotFound
10
+ from .datasets.yaml_repository import YAMLRepository
10
11
 
12
+ from pkg_resources import get_distribution, DistributionNotFound
13
+ from .definitions import dataset, metadata
14
+ from .data import Base
11
15
  from .version import version, version_tuple
datamaestro/context.py CHANGED
@@ -1,21 +1,22 @@
1
1
  from pathlib import Path
2
- from experimaestro.compat import cached_property
2
+ from typing import Iterable, Iterator, Dict, Union
3
3
  import importlib
4
4
  import os
5
5
  import hashlib
6
6
  import logging
7
7
  import inspect
8
8
  import json
9
- from experimaestro.mkdocs.metaloader import Module
9
+ from abc import ABC, abstractmethod
10
+ from experimaestro import Config
10
11
  import pkg_resources
11
- from typing import Iterable, Iterator, List, Dict
12
+ from experimaestro.compat import cached_property
13
+ from experimaestro.mkdocs.metaloader import Module
12
14
  from .utils import CachedFile, downloadURL
13
15
  from .settings import UserSettings, Settings
14
-
15
16
  from typing import TYPE_CHECKING
16
17
 
17
18
  if TYPE_CHECKING:
18
- from datamaestro.definitions import AbstractDataset
19
+ from datamaestro.definitions import AbstractDataset, DatasetWrapper
19
20
 
20
21
 
21
22
  class Compression:
@@ -87,6 +88,11 @@ class Context:
87
88
 
88
89
  return ContextManager()
89
90
 
91
+ @property
92
+ def storepath(self):
93
+ """Replaces the data path"""
94
+ return self._path.joinpath("store")
95
+
90
96
  @property
91
97
  def datapath(self):
92
98
  return self._path.joinpath("data")
@@ -98,7 +104,9 @@ class Context:
98
104
  @cached_property
99
105
  def repositorymap(self) -> Dict[str, "Repository"]:
100
106
  return {
101
- repository.basemodule(): repository for repository in self.repositories()
107
+ repository.basemodule(): repository
108
+ for repository in self.repositories()
109
+ if repository.basemodule() is not None
102
110
  }
103
111
 
104
112
  def repositories(self) -> Iterable["Repository"]:
@@ -286,10 +294,53 @@ class Datasets(Iterable["AbstractDataset"]):
286
294
  yield value.__dataset__
287
295
 
288
296
 
289
- class Repository:
290
- """A repository regroup a set of datasets and their corresponding specific
297
+ class BaseRepository(ABC):
298
+ """A repository groups a set of datasets and their corresponding specific
291
299
  handlers (downloading, filtering, etc.)"""
292
300
 
301
+ def __init__(self, context: Context):
302
+ self.context = context
303
+ p = inspect.getabsfile(self.__class__)
304
+ self.basedir = Path(p).parent
305
+
306
+ @abstractmethod
307
+ def __iter__(self) -> Iterator["AbstractDataset"]:
308
+ ...
309
+
310
+ def search(self, name: str):
311
+ """Search for a dataset in the definitions"""
312
+ for dataset in self:
313
+ if name in dataset.aliases:
314
+ return dataset
315
+
316
+ @classmethod
317
+ def instance(cls, context=None):
318
+ try:
319
+ return cls.__getattribute__(cls, "INSTANCE")
320
+ except AttributeError:
321
+ return cls(context if context else Context.instance())
322
+
323
+ @classmethod
324
+ def basemodule(cls):
325
+ return cls.__module__
326
+
327
+ @property
328
+ def generatedpath(self):
329
+ return self.basedir / "generated"
330
+
331
+ @property
332
+ def datapath(self):
333
+ return self.context.datapath.joinpath(self.id)
334
+
335
+ @property
336
+ def extrapath(self):
337
+ """Path to the directory containing extra configuration files"""
338
+ return self.basedir / "data"
339
+
340
+
341
+ class Repository(BaseRepository):
342
+ """(deprecated) Repository where datasets are located in __module__.config"""
343
+
293
344
  def __init__(self, context: Context):
294
345
  """Initialize a new repository
295
346
 
@@ -297,26 +348,14 @@ class Repository:
297
348
  :param basedir: The base directory of the repository
298
349
  (by default, the same as the repository class)
299
350
  """
351
+ super().__init__(context)
300
352
  self.context = context
301
- p = inspect.getabsfile(self.__class__)
302
- self.basedir = Path(p).parent
303
353
  self.configdir = self.basedir.joinpath("config")
304
354
  self.id = self.__class__.NAMESPACE
305
355
  self.name = self.id
306
356
  self.module = self.__class__.__module__
307
357
  self.__class__.INSTANCE = self
308
358
 
309
- @classmethod
310
- def basemodule(cls):
311
- return cls.__module__
312
-
313
- @classmethod
314
- def instance(cls, context=None):
315
- try:
316
- return cls.__getattribute__(cls, "INSTANCE")
317
- except AttributeError:
318
- return cls(context if context else Context.instance())
319
-
320
359
  @classmethod
321
360
  def version(cls):
322
361
  from pkg_resources import get_distribution, DistributionNotFound
@@ -336,36 +375,8 @@ class Repository:
336
375
  assert isinstance(other, Repository)
337
376
  return self.basedir == other.basedir
338
377
 
339
- def search(self, name: str):
340
- """Search for a dataset in the definitions"""
341
- logging.debug("Searching for %s in %s", name, self.configdir)
342
-
343
- candidates: List[str] = []
344
- components = name.split(".")
345
- path = self.configdir
346
- for i, c in enumerate(components):
347
- path = path / c
348
-
349
- if (path / "__init__.py").is_file():
350
- candidates.append(".".join(components[: i + 1]))
351
-
352
- if path.with_suffix(".py").is_file():
353
- candidates.append(".".join(components[: i + 1]))
354
-
355
- if not path.is_dir():
356
- break
357
-
358
- # Get the dataset
359
- for candidate in candidates[::-1]:
360
- logging.debug("Searching in module %s.config.%s", self.module, candidate)
361
- module = importlib.import_module("%s.config.%s" % (self.module, candidate))
362
- for value in Datasets(module):
363
- if name in value.aliases:
364
- return value
365
-
366
- return None
367
-
368
- def datasets(self, candidate):
378
+ def datasets(self, candidate: str):
379
+ """Returns the dataset candidates from a module"""
369
380
  try:
370
381
  module = importlib.import_module("%s.config.%s" % (self.module, candidate))
371
382
  except ModuleNotFoundError:
@@ -409,19 +420,6 @@ class Repository:
409
420
  for dataset in datasets:
410
421
  yield dataset
411
422
 
412
- @property
413
- def generatedpath(self):
414
- return self.basedir.joinpath("generated")
415
-
416
- @property
417
- def datapath(self):
418
- return self.context.datapath.joinpath(self.id)
419
-
420
- @property
421
- def extrapath(self):
422
- """Path to the directory containing extra configuration files"""
423
- return self.basedir.joinpath("data")
424
-
425
423
 
426
424
  def find_dataset(dataset_id: str):
427
425
  """Find a dataset given its id"""
@@ -430,11 +428,17 @@ def find_dataset(dataset_id: str):
430
428
  return AbstractDataset.find(dataset_id)
431
429
 
432
430
 
433
- def prepare_dataset(dataset_id: str):
431
+ def prepare_dataset(dataset_id: Union[str, "DatasetWrapper", Config]):
434
432
  """Find a dataset given its id and download the resources"""
435
- from .definitions import AbstractDataset
433
+ from .definitions import AbstractDataset, DatasetWrapper
434
+
435
+ if isinstance(dataset_id, DatasetWrapper):
436
+ ds = dataset_id
437
+ elif isinstance(dataset_id, Config):
438
+ ds = dataset_id.__datamaestro_dataset__
439
+ else:
440
+ ds = AbstractDataset.find(dataset_id)
436
441
 
437
- ds = AbstractDataset.find(dataset_id)
438
442
  return ds.prepare(download=True)
439
443
 
440
444
 
@@ -1,22 +1,18 @@
1
+ from abc import abstractmethod
1
2
  import logging
2
3
  from pathlib import Path
3
4
  from typing import Any, Dict
4
- from datamaestro.definitions import AbstractDataset, argument, Param
5
- from experimaestro import Config
6
- from experimaestro import documentation # noqa: F401
5
+ from experimaestro import Config, Param, Meta
6
+ from datamaestro.definitions import AbstractDataset
7
7
 
8
8
 
9
9
  class Base(Config):
10
10
  """Base object for all data types"""
11
11
 
12
12
  id: Param[str]
13
- """The unique dataset ID"""
13
+ """The unique (sub-)dataset ID"""
14
14
 
15
- __datamaestro_dataset__: AbstractDataset
16
-
17
- def download(self):
18
- """Download the dataset"""
19
- self.__datamaestro_dataset__.download()
15
+ __datamaestro_dataset__: "AbstractDataset"
20
16
 
21
17
  def dataset_information(self) -> Dict[str, Any]:
22
18
  """Returns document meta-informations"""
@@ -26,6 +22,16 @@ class Base(Config):
26
22
  "description": self.__datamaestro_dataset__.description,
27
23
  }
28
24
 
25
+ def download(self):
26
+ """Download the dataset"""
27
+ self.__datamaestro_dataset__.download()
28
+
29
+ @abstractmethod
30
+ def prepare(self, *args, **kwargs):
31
+ """Prepare the dataset"""
32
+ self.__datamaestro_dataset__.prepare()
33
+ return self
34
+
29
35
 
30
36
  class Generic(Base):
31
37
  """Generic dataset
@@ -44,16 +50,17 @@ class Generic(Base):
44
50
  class File(Base):
45
51
  """A data file"""
46
52
 
47
- path: Param[Path]
53
+ path: Meta[Path]
48
54
  """The path of the file"""
49
55
 
50
56
  def open(self, mode):
51
57
  return self.path.open(mode)
52
58
 
53
59
 
54
- @argument("path", type=Path)
55
60
  class Folder(Base):
56
61
  """A data folder"""
57
62
 
63
+ path: Meta[Path]
64
+
58
65
  def open(self, mode):
59
66
  return self.path.open(mode)
datamaestro/data/csv.py CHANGED
@@ -1,7 +1,8 @@
1
+ from typing import Optional, Tuple, List, Any
1
2
  from csv import reader as csv_reader
2
- from . import File, argument, documentation
3
- from datamaestro.definitions import Meta
4
- from typing import Tuple, List, Any
3
+ from experimaestro import Param, Meta
4
+ from experimaestro import documentation
5
+ from . import File
5
6
 
6
7
 
7
8
  class Generic(File):
@@ -26,12 +27,13 @@ class Generic(File):
26
27
  return row
27
28
 
28
29
 
29
- @argument("names_row", type=int, default=-1)
30
- @argument("size_row", type=int, default=-1)
31
- @argument("target", type=str, default=None)
32
30
  class Matrix(Generic):
33
31
  """A numerical dataset"""
34
32
 
33
+ names_row: Param[int] = -1
34
+ size_row: Param[int] = -1
35
+ target: Param[Optional[str]] = None
36
+
35
37
  @documentation
36
38
  def data(self) -> Tuple[List[str], Any]:
37
39
  """Returns the list of fields and the numeric data
datamaestro/data/ml.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Machine learning generic data formats"""
2
- from typing import Generic, TypeVar, Optional
3
2
  from pathlib import Path
4
- from experimaestro import Param, Meta, argument
3
+ from typing import Generic, TypeVar, Optional
4
+ from experimaestro import Param, Meta
5
5
  from . import Base
6
6
 
7
7
  Train = TypeVar("Train", bound=Base)
@@ -20,8 +20,8 @@ class Supervised(Base, Generic[Train, Validation, Test]):
20
20
  """The training optional"""
21
21
 
22
22
 
23
- @argument("classes")
24
23
  class FolderBased(Base):
25
24
  """Classification dataset where folders give the basis"""
26
25
 
26
+ classes: Param[list[str]]
27
27
  path: Meta[Path]
@@ -1,44 +1,50 @@
1
- from pathlib import Path
1
+ from abc import ABC, abstractmethod
2
2
  from struct import Struct
3
- from . import File
3
+ from typing import TYPE_CHECKING
4
+ from . import File, Base
4
5
 
6
+ if TYPE_CHECKING:
7
+ import numpy as np
5
8
 
6
- class IDX(File):
9
+
10
+ class Tensor(Base, ABC):
11
+ @abstractmethod
12
+ def data(self) -> "np.ndarray":
13
+ """Returns the tensor in numpy format"""
14
+ pass
15
+
16
+
17
+ class IDX(Tensor, File):
7
18
  """IDX File format
8
19
 
9
- The IDX file format is a simple format for vectors and multidimensional matrices of various numerical types.
20
+ The IDX file format is a simple format for vectors and multidimensional
21
+ matrices of various numerical types.
10
22
 
11
23
  The basic format is:
12
24
 
13
- magic number
14
- size in dimension 0
15
- size in dimension 1
16
- size in dimension 2
17
- .....
18
- size in dimension N
19
- data
25
+ magic number size in dimension 0 size in dimension 1 size in dimension 2
26
+ ..... size in dimension N data
20
27
 
21
28
  The magic number is an integer (MSB first). The first 2 bytes are always 0.
22
29
 
23
- The third byte codes the type of the data:
24
- 0x08: unsigned byte
25
- 0x09: signed byte
26
- 0x0B: short (2 bytes)
27
- 0x0C: int (4 bytes)
28
- 0x0D: float (4 bytes)
29
- 0x0E: double (8 bytes)
30
+ The third byte codes the type of the data: 0x08: unsigned byte 0x09: signed
31
+ byte 0x0B: short (2 bytes) 0x0C: int (4 bytes) 0x0D: float (4 bytes) 0x0E:
32
+ double (8 bytes)
30
33
 
31
- The 4-th byte codes the number of dimensions of the vector/matrix: 1 for vectors, 2 for matrices....
34
+ The 4-th byte codes the number of dimensions of the vector/matrix: 1 for
35
+ vectors, 2 for matrices....
32
36
 
33
- The sizes in each dimension are 4-byte integers (MSB first, high endian, like in most non-Intel processors).
37
+ The sizes in each dimension are 4-byte integers (MSB first, high endian,
38
+ like in most non-Intel processors).
34
39
 
35
- The data is stored like in a C array, i.e. the index in the last dimension changes the fastest.
40
+ The data is stored like in a C array, i.e. the index in the last dimension
41
+ changes the fastest.
36
42
  """
37
43
 
38
44
  MAGIC_NUMBER = Struct(">HBB")
39
45
  DIM = Struct(">I")
40
46
 
41
- def data(self):
47
+ def data(self) -> "np.ndarray":
42
48
  """Returns the tensor"""
43
49
  import numpy as np
44
50
 
@@ -59,7 +65,8 @@ class IDX(File):
59
65
  shape = [IDX.DIM.unpack_from(fp.read(IDX.DIM.size))[0] for i in range(size)]
60
66
 
61
67
  size = np.prod(shape)
62
- # Could use np.fromfile... if it were not broken - see https://github.com/numpy/numpy/issues/7989
68
+ # Could use np.fromfile... if it were not broken
69
+ # see https://github.com/numpy/numpy/issues/7989
63
70
  data = np.frombuffer(fp.read(), dtype=dtype, count=size)
64
71
  data = data.reshape(shape, order="C")
65
72
  return data
File without changes
@@ -0,0 +1,103 @@
1
+ import regex
2
+ from typing import Iterator, Optional
3
+ from functools import cached_property
4
+ from attrs import field
5
+ import importlib
6
+ from omegaconf import OmegaConf
7
+ from functools import partial
8
+ from attrs import define
9
+ from datamaestro import BaseRepository
10
+ from datamaestro.definitions import AbstractDataset, DatasetWrapper
11
+ from datamaestro.data import Base
12
+
13
+
14
+ re_spec = regex.compile(r"""^(\w\.)+:(\w+)""")
15
+
16
+
17
+ @define
18
+ class RepositoryDataset:
19
+ ids: list[str]
20
+ """ID(s) of this dataset"""
21
+
22
+ entry_point: str = field(validator=re_spec.match)
23
+ """The entry point"""
24
+
25
+ title: str
26
+ """The full name of the dataset"""
27
+
28
+ description: str
29
+ """Description of the dataset"""
30
+
31
+ url: Optional[str]
32
+ """The URL"""
33
+
34
+ groups: Optional[list[str]]
35
+ """Groups to which this repository belongs"""
36
+
37
+
38
+ @define
39
+ class RepositoryAuthors:
40
+ name: str
41
+ email: str
42
+
43
+
44
+ @define
45
+ class RepositoryGroup:
46
+ name: str
47
+ tasks: list[str]
48
+ tags: list[str]
49
+
50
+
51
+ @define
52
+ class RepositoryConfiguration:
53
+ namespace: str
54
+ authors: list[RepositoryAuthors]
55
+ description: str
56
+ groups: dict[str, RepositoryGroup]
57
+ datasets: list[RepositoryDataset]
58
+
59
+
60
+ class YAMLDataset(AbstractDataset):
61
+ def __init__(self, repository: "YAMLRepository", information: RepositoryDataset):
62
+ super().__init__(repository)
63
+ self.information = information
64
+ self.id = self.information.ids[0]
65
+ self.aliases = set(self.information.ids)
66
+
67
+ @cached_property
68
+ def wrapper(self) -> DatasetWrapper:
69
+ module, func_name = self.information.entry_point.split(":")
70
+ wrapper = getattr(importlib.import_module(module), func_name)
71
+ return wrapper
72
+
73
+ def _prepare(self) -> "Base":
74
+ return self.wrapper()
75
+
76
+ def download(self, **kwargs):
77
+ return self.wrapper.download(**kwargs)
78
+
79
+
80
+ class YAMLRepository(BaseRepository):
81
+ """YAML-based repository"""
82
+
83
+ @property
84
+ def id(self):
85
+ return self.configuration.namespace
86
+
87
+ @property
88
+ def name(self):
89
+ return self.configuration.namespace
90
+
91
+ @cached_property
92
+ def configuration(self):
93
+ schema = OmegaConf.structured(RepositoryConfiguration)
94
+ with importlib.resources.path(
95
+ self.__class__.__module__, "datamaestro.yaml"
96
+ ) as fp:
97
+ conf = OmegaConf.load(fp)
98
+
99
+ conf: RepositoryConfiguration = OmegaConf.merge(schema, conf)
100
+ return conf
101
+
102
+ def __iter__(self) -> Iterator["AbstractDataset"]:
103
+ return map(partial(YAMLDataset, self), self.configuration.datasets)
@@ -6,6 +6,8 @@ import logging
6
6
  import inspect
7
7
  from pathlib import Path
8
8
  from itertools import chain
9
+ from abc import ABC, abstractmethod
10
+ from contextlib import contextmanager
9
11
  import traceback
10
12
  from typing import (
11
13
  Dict,
@@ -16,6 +18,9 @@ from typing import (
16
18
  TypeVar,
17
19
  Callable,
18
20
  TYPE_CHECKING,
21
+ Union,
22
+ ClassVar,
23
+ _GenericAlias,
19
24
  )
20
25
  from experimaestro import ( # noqa: F401 (re-exports)
21
26
  argument,
@@ -27,16 +32,16 @@ from experimaestro import ( # noqa: F401 (re-exports)
27
32
  )
28
33
  from typing import Type as TypingType # noqa: F401 (re-exports)
29
34
  from experimaestro.core.types import Type # noqa: F401 (re-exports)
30
- from .context import Repository, Context, DatafolderPath # noqa: F401 (re-exports)
31
35
 
32
36
  if TYPE_CHECKING:
37
+ from .data import Base, Dataset
38
+ from .context import Repository, Context, DatafolderPath # noqa: F401 (re-exports)
33
39
  from datamaestro.download import Download
34
- from .data import Base
35
40
 
36
41
  # --- Objects holding information into classes/function
37
42
 
38
43
 
39
- class AbstractData:
44
+ class AbstractData(ABC):
40
45
  """Data definition groups common fields between a dataset and a data piece,
41
46
  such as tags and tasks"""
42
47
 
@@ -77,8 +82,10 @@ class DataDefinition(AbstractData):
77
82
  return self._description
78
83
 
79
84
  @staticmethod
80
- def repository_relpath(t: type) -> Tuple[Repository, List[str]]:
85
+ def repository_relpath(t: type) -> Tuple["Repository", List[str]]:
81
86
  """Find the repository of the current data or dataset definition"""
87
+ from .context import Context # noqa: F811
88
+
82
89
  repositorymap = Context.instance().repositorymap
83
90
 
84
91
  fullname = f"{t.__module__}.{t.__name__}"
@@ -97,9 +104,6 @@ class DataDefinition(AbstractData):
97
104
  if components[0] == "datamaestro":
98
105
  longest_ix = 0
99
106
 
100
- if repository is None:
101
- raise Exception(f"Could not find the repository for {fullname}")
102
-
103
107
  return repository, components[(longest_ix + 1) :]
104
108
 
105
109
  def ancestors(self):
@@ -163,18 +167,26 @@ class AbstractDataset(AbstractData):
163
167
 
164
168
  @property
165
169
  def context(self):
170
+ if self.repository is None:
171
+ from datamaestro.context import Context # noqa: F811
172
+
173
+ return Context.instance()
166
174
  return self.repository.context
167
175
 
168
176
  def prepare(self, download=False) -> "Base":
169
- ds = self._prepare(download)
177
+ ds = self._prepare()
170
178
  ds.__datamaestro_dataset__ = self
179
+
180
+ if download:
181
+ ds.download()
171
182
  return ds
172
183
 
173
184
  def register_hook(self, hookname: str, hook: Callable):
174
185
  self.hooks[hookname].append(hook)
175
186
 
176
- def _prepare(self, download=False) -> "Base":
177
- raise NotImplementedError(f"prepare() in {self.__class__}")
187
+ @abstractmethod
188
+ def _prepare(self) -> "Base":
189
+ ...
178
190
 
179
191
  def format(self, encoder: str) -> str:
180
192
  s = self.prepare()
@@ -194,7 +206,10 @@ class AbstractDataset(AbstractData):
194
206
  from datamaestro.data import Base
195
207
 
196
208
  if isinstance(data, Base):
197
- data.id = f"{id}@{self.repository.name}"
209
+ if self.repository is None:
210
+ data.id = id
211
+ else:
212
+ data.id = f"{id}@{self.repository.name}"
198
213
  for key, value in data.__xpm__.values.items():
199
214
  if isinstance(value, Config):
200
215
  self.setDataIDs(value, f"{id}.{key}")
@@ -203,6 +218,7 @@ class AbstractDataset(AbstractData):
203
218
  """Download all the necessary resources"""
204
219
  success = True
205
220
  logging.info("Materializing %d resources", len(self.ordered_resources))
221
+ self.prepare()
206
222
  for resource in self.ordered_resources:
207
223
  try:
208
224
  resource.download(force)
@@ -216,6 +232,8 @@ class AbstractDataset(AbstractData):
216
232
  @staticmethod
217
233
  def find(name: str) -> "DataDefinition":
218
234
  """Find a dataset given its name"""
235
+ from datamaestro.context import Context # noqa: F811
236
+
219
237
  logging.debug("Searching dataset %s", name)
220
238
  for repository in Context.instance().repositories():
221
239
  logging.debug("Searching dataset %s in %s", name, repository)
@@ -226,7 +244,7 @@ class AbstractDataset(AbstractData):
226
244
 
227
245
 
228
246
  class FutureAttr:
229
- """Allows to access a dataset subproperty"""
247
+ """Allows to access a dataset sub-property"""
230
248
 
231
249
  def __init__(self, dataset, keys):
232
250
  self.dataset = dataset
@@ -256,10 +274,14 @@ class DatasetWrapper(AbstractDataset):
256
274
  annotations (otherwise, derive from `AbstractDataset`).
257
275
  """
258
276
 
277
+ BUILDING: ClassVar[list["DatasetWrapper"]] = []
278
+ """Currently built dataset"""
279
+
259
280
  def __init__(self, annotation, t: type):
281
+ self.config = None
282
+ self.repository: Optional[Repository] = None
260
283
  self.t = t
261
284
  self.base = annotation.base
262
- self.config = None
263
285
  assert self.base is not None, f"Could not set the Config type for {t}"
264
286
 
265
287
  repository, components = DataDefinition.repository_relpath(t)
@@ -271,19 +293,22 @@ class DatasetWrapper(AbstractDataset):
271
293
 
272
294
  # Builds the ID:
273
295
  # Removes module_name.config prefix
274
- assert (
275
- components[0] == "config"
276
- ), f"A @dataset object should be in the .config module (not {t.__module__})"
296
+ if annotation.id is None or annotation.id == "":
297
+ # Computes an ID
298
+ assert (
299
+ # id is empty string = use the module id
300
+ components[0]
301
+ == "config"
302
+ ), (
303
+ "A @dataset without `id` should be in the "
304
+ f".config module (not {t.__module__})"
305
+ )
306
+ path = ".".join(components[1:-1])
277
307
 
278
- path = ".".join(components[1:-1])
279
- if annotation.id == "":
280
- # id is empty string = use the module id
281
308
  self.id = path
282
309
  else:
283
- self.id = "%s.%s" % (
284
- path,
285
- annotation.id or t.__name__.lower().replace("_", "."),
286
- )
310
+ # Use the provided ID
311
+ self.id = annotation.id
287
312
 
288
313
  self.aliases.add(self.id)
289
314
 
@@ -327,9 +352,6 @@ class DatasetWrapper(AbstractDataset):
327
352
  def configtype(self):
328
353
  return self.base
329
354
 
330
- def __call__(self, *args, **kwargs):
331
- self.t(*args, **kwargs)
332
-
333
355
  def __getattr__(self, key):
334
356
  """Returns a pointer to a potential attribute"""
335
357
  return FutureAttr(self, [key])
@@ -339,40 +361,59 @@ class DatasetWrapper(AbstractDataset):
339
361
  self._prepare()
340
362
  return super().download(force=force)
341
363
 
342
- def _prepare(self, download=False) -> "Base":
364
+ @contextmanager
365
+ def building(self):
366
+ DatasetWrapper.BUILDING.append(self)
367
+ yield self
368
+ DatasetWrapper.BUILDING.pop()
369
+
370
+ def _prepare(self) -> "Base":
343
371
  if self.config is not None:
344
372
  return self.config
345
373
 
374
+ # Direct creation of the dataset
346
375
  if self.base is self.t:
347
376
  self.config = self.base.__create_dataset__(self)
348
377
 
349
- if download:
350
- for hook in self.hooks["pre-download"]:
351
- hook(self)
352
- if not self.download(False):
353
- raise Exception("Could not load necessary resources")
378
+ # Construct the object
379
+ resources = {key: value.prepare() for key, value in self.resources.items()}
380
+
381
+ with self.building():
382
+ result = self.t(**resources)
383
+
384
+ # Download resources
354
385
  logging.debug("Building with data type %s and dataset %s", self.base, self.t)
355
386
  for hook in self.hooks["pre-use"]:
356
387
  hook(self)
357
388
 
358
- # Construct the object
359
- if self.config is None:
360
- resources = {key: value.prepare() for key, value in self.resources.items()}
361
- dict = self.t(**resources)
362
- if dict is None:
363
- name = self.t.__name__
364
- filename = inspect.getfile(self.t)
365
- raise Exception(
366
- f"The dataset method {name} defined in "
367
- f"{filename} returned a null object"
368
- )
369
- self.config = self.base(**dict)
389
+ if result is None:
390
+ name = self.t.__name__
391
+ filename = inspect.getfile(self.t)
392
+ raise Exception(
393
+ f"The dataset method {name} defined in "
394
+ f"{filename} returned a null object"
395
+ )
396
+
397
+ if isinstance(result, dict):
398
+ self.config = self.base(**result)
399
+ elif isinstance(result, self.base):
400
+ self.config = result
401
+ else:
402
+ raise RuntimeError(
403
+ f"The dataset method {name} defined in "
404
+ f"{filename} returned an object of type {type(dict)}"
405
+ )
406
+
407
+ # Setup ourself
408
+ self.config.__datamaestro_dataset__ = self
370
409
 
371
410
  # Set the ids
372
411
  self.setDataIDs(self.config, self.id)
373
412
 
374
413
  return self.config
375
414
 
415
+ __call__ = _prepare
416
+
376
417
  @property
377
418
  def _path(self) -> Path:
378
419
  """Returns a unique relative path for this dataset"""
@@ -384,7 +425,20 @@ class DatasetWrapper(AbstractDataset):
384
425
  @property
385
426
  def datapath(self):
386
427
  """Returns the destination path for downloads"""
387
- return self.repository.datapath / self._path
428
+ from datamaestro import Context # noqa: F811
429
+
430
+ path = Context.instance().storepath / self._path
431
+
432
+ if (self.repository is not None) and (not path.exists()):
433
+ old_path: Path = self.repository.datapath / self._path
434
+ if old_path.exists():
435
+ logging.info(
436
+ "Moving from old path [%s] to new path [%s]", old_path, path
437
+ )
438
+ path.parent.mkdir(exist_ok=True, parents=True)
439
+ old_path.rename(path)
440
+
441
+ return path
388
442
 
389
443
  def hasfiles(self) -> bool:
390
444
  """Returns whether this dataset has files or only includes references"""
@@ -426,10 +480,16 @@ class DatasetAnnotation:
426
480
  """Base class for all annotations"""
427
481
 
428
482
  def __call__(self, dataset: AbstractDataset):
429
- assert isinstance(
430
- dataset, AbstractDataset
431
- ), f"Only datasets can be annotated with {self}, but {dataset} is not a dataset"
432
- self.annotate(dataset)
483
+ if isinstance(dataset, AbstractDataset):
484
+ self.annotate(dataset)
485
+ elif issubclass(dataset, Dataset):
486
+ self.annotate(dataset.__datamaestro__)
487
+ else:
488
+ raise RuntimeError(
489
+ f"Only datasets can be annotated with {self}, "
490
+ f"but {dataset} is not a dataset"
491
+ )
492
+
433
493
  return dataset
434
494
 
435
495
  def annotate(self, dataset: AbstractDataset):
@@ -477,9 +537,27 @@ datatags = DataTagging(lambda d: d.tags)
477
537
  datatasks = DataTagging(lambda d: d.tasks)
478
538
 
479
539
 
540
+ class metadata:
541
+ def __init__(
542
+ self, tags: Union[str, List[str]] = None, tasks: Union[str, List[str]] = None
543
+ ):
544
+ pass
545
+
546
+ def __call__(self, object: type):
547
+ # FIXME: todo
548
+ return object
549
+
550
+
480
551
  class dataset:
481
552
  def __init__(
482
- self, base=None, *, timestamp=None, id=None, url=None, size=None, doi=None
553
+ self,
554
+ base=None,
555
+ *,
556
+ timestamp=None,
557
+ id=None,
558
+ url=None,
559
+ size=None,
560
+ doi=None,
483
561
  ):
484
562
  """Creates a new (meta)dataset
485
563
 
@@ -523,9 +601,12 @@ class dataset:
523
601
  if inspect.isclass(t) and issubclass(t, Base):
524
602
  self.base = t
525
603
  else:
526
- # Get type from return annotation
527
604
  try:
528
- self.base = t.__annotations__["return"]
605
+ # Get type from return annotation
606
+ return_type = t.__annotations__["return"]
607
+ if isinstance(return_type, _GenericAlias):
608
+ return_type = return_type.__origin__
609
+ self.base = return_type
529
610
  except KeyError:
530
611
  logging.warning("No return annotation in %s", t)
531
612
  raise
@@ -533,7 +614,6 @@ class dataset:
533
614
  raise AssertionError("@data should only be called once")
534
615
  except AttributeError:
535
616
  pass
536
-
537
617
  dw = DatasetWrapper(self, t)
538
618
  t.__dataset__ = dw
539
619
  if inspect.isclass(t) and issubclass(t, Base):
@@ -560,3 +640,5 @@ class metadataset(AbstractDataset):
560
640
  pass
561
641
  t.__datamaestro__ = self
562
642
  return t
643
+
644
+ _prepare = None
@@ -43,6 +43,13 @@ class Resource(DatasetAnnotation, ABC):
43
43
  dataset.ordered_resources.append(self)
44
44
  self.definition = dataset
45
45
 
46
+ def contextualize(self):
47
+ """When using an annotation inline, uses the current dataset wrapper object"""
48
+ from datamaestro.definitions import DatasetWrapper
49
+
50
+ wrapper = DatasetWrapper.BUILDING[-1]
51
+ self.annotate(wrapper)
52
+
46
53
  @property
47
54
  def context(self):
48
55
  return self.definition.context
@@ -77,7 +84,7 @@ class Resource(DatasetAnnotation, ABC):
77
84
  Download = Resource
78
85
 
79
86
 
80
- class reference(Download):
87
+ class reference(Resource):
81
88
  def __init__(self, varname=None, reference=None):
82
89
  """References another dataset
83
90
 
@@ -0,0 +1,29 @@
1
+ from typing import Protocol
2
+ from pathlib import Path
3
+ from datamaestro import Context
4
+ from datamaestro.definitions import DatasetWrapper
5
+ from datamaestro.download import Resource
6
+
7
+
8
+ class Downloader(Protocol):
9
+ def __call__(self, context: Context, root: Path, *, force=False):
10
+ pass
11
+
12
+
13
+ class CustomResource(Resource):
14
+ def __init__(self, ds_wrapper: DatasetWrapper, downloader: Downloader):
15
+ self.ds_wrapper = ds_wrapper
16
+ self.downloader = downloader
17
+
18
+ def prepare(self):
19
+ pass
20
+
21
+ def download(self, force=False):
22
+ self.downloader(self.context, self.ds_wrapper.datapath, force=force)
23
+
24
+
25
+ def custom_download(downloader: Downloader) -> Path:
26
+ ds_wrapper = DatasetWrapper.BUILDING[-1]
27
+ ds_wrapper.ordered_resources.append(CustomResource(ds_wrapper, downloader))
28
+
29
+ return ds_wrapper.datapath
@@ -9,7 +9,7 @@ import os
9
9
  import urllib3
10
10
  from pathlib import Path
11
11
  import re
12
- from datamaestro.utils import copyfileobjs
12
+ from datamaestro.utils import copyfileobjs, FileChecker
13
13
  from datamaestro.stream import Transform
14
14
  from datamaestro.download import Download
15
15
 
@@ -96,6 +96,20 @@ class filedownloader(SingleDownload):
96
96
  logging.info("Created file %s" % destination)
97
97
 
98
98
 
99
+ def file_from_url(
100
+ filename: str,
101
+ url: str,
102
+ *,
103
+ size: Optional[int] = None,
104
+ transforms: Optional[Transform] = None,
105
+ checker: Optional[FileChecker] = None,
106
+ ) -> Path:
107
+ """Defines a file that should be downloaded from"""
108
+ downloader = filedownloader(filename, url, size, transforms, checker)
109
+ downloader.contextualize()
110
+ return downloader.path
111
+
112
+
99
113
  class concatdownload(SingleDownload):
100
114
  """Concatenate all files in an archive"""
101
115
 
datamaestro/search.py CHANGED
@@ -40,7 +40,7 @@ class AndCondition(Condition):
40
40
  return True
41
41
 
42
42
  def __repr__(self):
43
- return " AND ".join(self.conditions)
43
+ return " AND ".join([repr(x) for x in self.conditions])
44
44
 
45
45
 
46
46
  class OrCondition(Condition):
@@ -5,6 +5,7 @@ from datamaestro.definitions import AbstractDataset
5
5
  def test_useragreements(context):
6
6
  # Fake dataset
7
7
  class t(AbstractDataset):
8
- pass
8
+ def _prepare(self):
9
+ pass
9
10
 
10
11
  useragreement("test")(t(None))
@@ -12,6 +12,9 @@ class Dataset(AbstractDataset):
12
12
  super().__init__(repository)
13
13
  self.datapath = Path(repository.context._path)
14
14
 
15
+ def _prepare(self):
16
+ pass
17
+
15
18
 
16
19
  def test_filedownloader(context):
17
20
  repository = MyRepository(context)
datamaestro/utils.py CHANGED
@@ -42,6 +42,8 @@ def copyfileobjs(fsrc, fdsts, length=0):
42
42
 
43
43
 
44
44
  class FileChecker:
45
+ """Checks a file"""
46
+
45
47
  def check(self, path: Path):
46
48
  """Check the given file
47
49
 
datamaestro/version.py CHANGED
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '1.2.1'
16
- __version_tuple__ = version_tuple = (1, 2, 1)
20
+ __version__ = version = '1.3.0'
21
+ __version_tuple__ = version_tuple = (1, 3, 0)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.2.1
3
+ Version: 1.3.0
4
4
  Summary: "Dataset management command line and API"
5
5
  Home-page: https://github.com/experimaestro/datamaestro
6
6
  Author: Benjamin Piwowarski
@@ -25,17 +25,19 @@ Requires-Dist: click
25
25
  Requires-Dist: tqdm
26
26
  Requires-Dist: urllib3
27
27
  Requires-Dist: marshmallow
28
- Requires-Dist: cached-property
28
+ Requires-Dist: cached_property
29
29
  Requires-Dist: requests
30
30
  Requires-Dist: bitmath
31
- Requires-Dist: experimaestro >=1.5.0
31
+ Requires-Dist: experimaestro>=1.5.0
32
32
  Requires-Dist: mkdocs
33
33
  Requires-Dist: pymdown-extensions
34
34
  Requires-Dist: mkdocs-material
35
- Requires-Dist: docstring-parser
35
+ Requires-Dist: docstring_parser
36
36
  Requires-Dist: numpy
37
37
  Provides-Extra: test
38
- Requires-Dist: tox ; extra == 'test'
38
+ Requires-Dist: tox; extra == "test"
39
+ Dynamic: license-file
40
+ Dynamic: requires-dist
39
41
 
40
42
  [![PyPI version](https://badge.fury.io/py/datamaestro.svg)](https://badge.fury.io/py/datamaestro) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit) [![DOI](https://zenodo.org/badge/4573876.svg)](https://zenodo.org/badge/latestdoi/4573876)
41
43
 
@@ -1,31 +1,34 @@
1
- datamaestro/__init__.py,sha256=9M5hA6FVngduJBcjInvJWQM8n0cqapXAFPzfRLHR74c,237
1
+ datamaestro/__init__.py,sha256=gnbxrPFzIuG4oR2Qrw9UYS0SNVsf4yCtqNvzSjstdak,376
2
2
  datamaestro/__main__.py,sha256=tJTf1sTWKRIatvBcHlWDIZRZodAZ2B2zkD01pD89MYk,9024
3
- datamaestro/context.py,sha256=8U5EYEdc9xcHnZFFk4PCZttxxGsmlzRVR8rLBy2zVBw,13605
4
- datamaestro/definitions.py,sha256=mBoLgrbO1eHVcqMPkb4lxadNdgSsy_w355nZofvBoF8,16732
3
+ datamaestro/context.py,sha256=S7sQ6RQVLjtoY5iyAikfyvfbqoaoDzcHt4-js8t6mMg,13653
4
+ datamaestro/definitions.py,sha256=HEnwB32Reb4ouLOjboEOe_j88keBZPQ0SU6OrO_ohLU,18764
5
5
  datamaestro/record.py,sha256=m3WGsPcZ1LouQXNJOBUK3QusAIRiuy6T_oqhq09-Ckg,5504
6
6
  datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
7
- datamaestro/search.py,sha256=PMceNp5hcp0dlzs4cLb6LJT7XHrdXo58oO7oTucawbE,2887
7
+ datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
8
8
  datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
9
9
  datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
10
- datamaestro/utils.py,sha256=Y3_aqeOHW8vuifwggGWJfgONyDG1FLX7ONAnX85jENI,6511
11
- datamaestro/version.py,sha256=2U0Gn26fYI3Vgj5hgkLM8I3wI6YEVdffJGllaVW-sSc,411
10
+ datamaestro/utils.py,sha256=9m-AVVww6InAZfGFiGy6XJzfExpYNqH1fhWQEezjafA,6536
11
+ datamaestro/version.py,sha256=qDtcPZdKzxLpd8vVl6fpIFIMkWt2HK_cO9gLDwaHEdk,511
12
12
  datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
13
13
  datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
14
14
  datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  datamaestro/commands/mainstyle.css,sha256=EAWq6hKWjLYZ-gUrGV-z3L8LtkubD7mLoYdSIC7kLOo,465
16
16
  datamaestro/commands/site.py,sha256=nnz4tOwKcgUmsLfPcQVo2SgFIC3OShYfJ8S2N6vuzAw,14173
17
- datamaestro/data/__init__.py,sha256=vOedQsnYtxI2yj-M2nm32eHpIu9S_WRzfA3futlHNs4,1412
18
- datamaestro/data/csv.py,sha256=-UXjEbKPvhhZ9_MdYnxUsD8Zsz2t4ZFbserFuHak8pw,2515
17
+ datamaestro/data/__init__.py,sha256=Z1qZnliJwS5sRaLznK5YBVJCjvAlPbmJjbRvvLv_UVI,1547
18
+ datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
19
19
  datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
20
- datamaestro/data/ml.py,sha256=guh1bxi7Dl3SajJdtBFrtPh6K8eNKiMkBKmBeKGuW5U,710
21
- datamaestro/data/tensor.py,sha256=OVzV1krIRslui8REdl7hPFu3AXlUyDxf5yUZlbNYsz8,2001
22
- datamaestro/download/__init__.py,sha256=Iqz7zEzeTsBWzE_6bpurhZVtzRjyXVUwCY6MEVjJpO0,2592
20
+ datamaestro/data/ml.py,sha256=7Rv4Tb9g17HDj8mOBJpIDjgolGQAd5Wrb0mHlnm-bPE,709
21
+ datamaestro/data/tensor.py,sha256=in36UQz4cdUEVmCS62pInu9RNekohRON667Z_JqNdhk,2254
22
+ datamaestro/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ datamaestro/datasets/yaml_repository.py,sha256=-UgpRwIALzmfubtb6kXVKjZ9IbiAsnslSth2I1XQ6EU,2539
24
+ datamaestro/download/__init__.py,sha256=XcRw9acAq1IwhLQZpj2HpMNEaMesA5BbllJpbRCkOwA,2846
23
25
  datamaestro/download/archive.py,sha256=G-2gzepknqT7Us3naMGAApGVGJMeHQIxM-tSpaa9ark,5608
26
+ datamaestro/download/custom.py,sha256=2-gFoOgQ8J93HjH9sc7u6wjVYm7DmSytP1ty2O6-d8k,839
24
27
  datamaestro/download/huggingface.py,sha256=LkzmZo2Z0yccqAfj7di7jDNGFrMKN9m8IM8SfexOomY,1125
25
28
  datamaestro/download/links.py,sha256=GFnq_AzI_uen7JBuGWD9qveeC9QFBWDrSnj7pOcwWwM,3352
26
29
  datamaestro/download/manual.py,sha256=-T2QWxKAiN3ZbSujjQUVeWDEDFonw9VnlzCfBIHcLao,190
27
30
  datamaestro/download/multiple.py,sha256=Mrr0ObHM5cE1CPSHE9PKIrox3qZVgxwRyxLzNXp0LqM,2159
28
- datamaestro/download/single.py,sha256=QSEviTP9lHLh3ZGyo_KoW3ro8UvWCGNPHeZiNj-9rLA,4134
31
+ datamaestro/download/single.py,sha256=bMDLldvODp2ZXyxXeKLT4qbL-v4igA6A7HVjIt2Cf8c,4526
29
32
  datamaestro/download/sync.py,sha256=Z_LsXj4kbZWIYKTVJZEhfdpYiv6wXOOIyw8LahmEcqs,836
30
33
  datamaestro/download/todo.py,sha256=y3YnmWC_i-u23ce-vreIwIXZcoO-uA0HXErgJPThnco,256
31
34
  datamaestro/download/wayback.py,sha256=B9X1P9jElvd_qnUs9aX0TAO-NrNyvuHLYDAcpNq354w,5430
@@ -36,12 +39,12 @@ datamaestro/templates/dataset.py,sha256=5065rTMAIl4gtzQ96GFiV1_46tY08miIx3WspTP8
36
39
  datamaestro/test/__init__.py,sha256=8-oxS68ufD45pv_HldE4S4rSWFF6L-UB_Cms-72DD2M,22
37
40
  datamaestro/test/checks.py,sha256=1eTkz4YJhAPOcnQSsz4vPnvzwwfrEnpn6H_s1ADISpo,1704
38
41
  datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,767
39
- datamaestro/test/test_annotations.py,sha256=kRPUmS_UAN6JSSVPUwV4OM_LEuEUHF1OcLSiYXjsKjw,246
40
- datamaestro/test/test_download_handlers.py,sha256=Qqm-fML1KVp6dPwAUcH6xzi_dpQIshvROzviSYCUzc0,603
42
+ datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
43
+ datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
41
44
  datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
42
- datamaestro-1.2.1.dist-info/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
43
- datamaestro-1.2.1.dist-info/METADATA,sha256=2_TL_ysMtfV2a84_0Uu3UQloCHCvetGZWo5tcjdhNCA,8999
44
- datamaestro-1.2.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
45
- datamaestro-1.2.1.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
46
- datamaestro-1.2.1.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
47
- datamaestro-1.2.1.dist-info/RECORD,,
45
+ datamaestro-1.3.0.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
46
+ datamaestro-1.3.0.dist-info/METADATA,sha256=UT7JBZzAVGEKtEjUm0jjiHMPW7ZtHlgWljs_9O8s_04,9042
47
+ datamaestro-1.3.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
48
+ datamaestro-1.3.0.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
49
+ datamaestro-1.3.0.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
50
+ datamaestro-1.3.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5