PyPI - datamaestro - Versions diffs - 1.3.2__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

datamaestro 1.3.2py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

datamaestro/__init__.py +0 -2
datamaestro/__main__.py +13 -9
datamaestro/context.py +0 -5
datamaestro/definitions.py +29 -29
datamaestro/download/__init__.py +3 -3
datamaestro/download/custom.py +5 -13
datamaestro/download/huggingface.py +1 -1
datamaestro/download/single.py +2 -16
datamaestro/version.py +2 -2
{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/METADATA +38 -40
{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/RECORD +15 -17
datamaestro/datasets/__init__.py +0 -0
datamaestro/datasets/yaml_repository.py +0 -103
{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/WHEEL +0 -0
{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/entry_points.txt +0 -0
{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/licenses/LICENSE +0 -0
{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/top_level.txt +0 -0

datamaestro/__init__.py CHANGED Viewed

@@ -7,8 +7,6 @@ from .context import (
     prepare_dataset,
 )
-from .datasets.yaml_repository import YAMLRepository
 from pkg_resources import get_distribution, DistributionNotFound
 from .definitions import dataset, metadata
 from .data import Base

datamaestro/__main__.py CHANGED Viewed

@@ -319,13 +319,17 @@ def search(config: Config, searchterms):
     logging.debug("Search: %s", condition)
     for dataset in config.context.datasets():
-        if condition.match(dataset):
-            cfg = dataset.configtype
-            print(
-                "[%s] %s (%s)"
-                % (
-                    dataset.repository.id,
-                    dataset.id,
-                    cfg.__name__ if cfg is not None else "?",
+        try:
+            if condition.match(dataset):
+                cfg = dataset.configtype
+                print(
+                    "[%s] %s (%s)"
+                    % (
+                        dataset.repository.id,
+                        dataset.id,
+                        cfg.__name__ if cfg is not None else "?",
+                    )
                 )
-            )
+        except Exception:
+            logging.error("Error while matching with dataset %s", dataset)
+            raise

datamaestro/context.py CHANGED Viewed

@@ -88,11 +88,6 @@ class Context:
         return ContextManager()
-    @property
-    def storepath(self):
-        """Replaces the data path"""
-        return self._path.joinpath("store")
     @property
     def datapath(self):
         return self._path.joinpath("data")

datamaestro/definitions.py CHANGED Viewed

@@ -7,7 +7,6 @@ import inspect
 from pathlib import Path
 from itertools import chain
 from abc import ABC, abstractmethod
-from contextlib import contextmanager
 import traceback
 from typing import (
     Dict,
@@ -19,7 +18,6 @@ from typing import (
     Callable,
     TYPE_CHECKING,
     Union,
-    ClassVar,
     _GenericAlias,
 )
 from experimaestro import (  # noqa: F401 (re-exports)
@@ -104,7 +102,7 @@ class DataDefinition(AbstractData):
             if components[0] == "datamaestro":
                 longest_ix = 0
-        return repository, components[(longest_ix + 1) :]
+        return repository, [s.lower() for s in components[(longest_ix + 1) :]]
     def ancestors(self):
         ancestors = []
@@ -217,8 +215,8 @@ class AbstractDataset(AbstractData):
     def download(self, force=False):
         """Download all the necessary resources"""
         success = True
-        logging.info("Materializing %d resources", len(self.ordered_resources))
         self.prepare()
+        logging.info("Materializing %d resources", len(self.ordered_resources))
         for resource in self.ordered_resources:
             try:
                 resource.download(force)
@@ -274,9 +272,6 @@ class DatasetWrapper(AbstractDataset):
     annotations (otherwise, derive from `AbstractDataset`).
     """
-    BUILDING: ClassVar[list["DatasetWrapper"]] = []
-    """Currently built dataset"""
     def __init__(self, annotation, t: type):
         self.config = None
         self.repository: Optional[Repository] = None
@@ -287,13 +282,22 @@ class DatasetWrapper(AbstractDataset):
         repository, components = DataDefinition.repository_relpath(t)
         super().__init__(repository)
+        self.module_name = None
+        if repository is None:
+            # Try to find the module name
+            self.module_name, _ = t.__module__.split(".", 1)
         # Set some variables
         self.url = annotation.url
         self.doi = annotation.doi
         # Builds the ID:
         # Removes module_name.config prefix
-        if annotation.id is None or annotation.id == "":
+        if (
+            (annotation.id is None)
+            or (annotation.id == "")
+            or ("." not in annotation.id)
+        ):
             # Computes an ID
             assert (
                 # id is empty string = use the module id
@@ -303,7 +307,15 @@ class DatasetWrapper(AbstractDataset):
                 "A @dataset without `id` should be in the "
                 f".config module (not {t.__module__})"
             )
-            path = ".".join(components[1:-1])
+            if annotation.id is None:
+                # There is nothing, use the full path
+                path = ".".join(components[1:])
+            else:
+                # Replace
+                path = ".".join(components[1:-1])
+                if annotation.id != "":
+                    path = f"{path}.{annotation.id}"
             self.id = path
         else:
@@ -361,12 +373,6 @@ class DatasetWrapper(AbstractDataset):
             self._prepare()
         return super().download(force=force)
-    @contextmanager
-    def building(self):
-        DatasetWrapper.BUILDING.append(self)
-        yield self
-        DatasetWrapper.BUILDING.pop()
     def _prepare(self) -> "Base":
         if self.config is not None:
             return self.config
@@ -378,8 +384,7 @@ class DatasetWrapper(AbstractDataset):
         # Construct the object
         resources = {key: value.prepare() for key, value in self.resources.items()}
-        with self.building():
-            result = self.t(**resources)
+        result = self.t(**resources)
         # Download resources
         logging.debug("Building with data type %s and dataset %s", self.base, self.t)
@@ -425,18 +430,11 @@ class DatasetWrapper(AbstractDataset):
     @property
     def datapath(self):
         """Returns the destination path for downloads"""
-        from datamaestro import Context  # noqa: F811
+        if self.repository is not None:
+            return self.repository.datapath / self._path
-        path = Context.instance().storepath / self._path
-        if (self.repository is not None) and (not path.exists()):
-            old_path: Path = self.repository.datapath / self._path
-            if old_path.exists():
-                logging.info(
-                    "Moving from old path [%s] to new path [%s]", old_path, path
-                )
-                path.parent.mkdir(exist_ok=True, parents=True)
-                old_path.rename(path)
+        # No repository, use __custom__/[MODULE NAME]
+        path = self.context.datapath / "__custom__" / self.module_name / self._path
         return path
@@ -571,13 +569,15 @@ class dataset:
             timestamp {bool} -- If the dataset evolves, specify its timestamp
             (default: None)
-            id {[type]} -- [description] (default: {None})
+            id {[type]} -- [description] (default: {None}) Gives the full ID of
+            the dataset if it contains a ., or just the last component otherwise
             url {[type]} -- [description] (default: {None})
             size {str} -- The size (should be a parsable format)
             doi {str} -- The DOI of the corresponding paper
         """
         if hasattr(base, "__datamaestro__") and isinstance(
             base.__datamaestro__, metadataset

datamaestro/download/__init__.py CHANGED Viewed

@@ -31,7 +31,7 @@ class Resource(DatasetAnnotation, ABC):
         self.varname = varname
         # Ensures that the object is initialized
         self._post = False
-        self.definition = None
+        self.definition: AbstractDataset = None
     def annotate(self, dataset: AbstractDataset):
         assert self.definition is None
@@ -45,9 +45,9 @@ class Resource(DatasetAnnotation, ABC):
     def contextualize(self):
         """When using an annotation inline, uses the current dataset wrapper object"""
-        from datamaestro.definitions import DatasetWrapper
+        from datamaestro.definitions import AbstractDataset
-        wrapper = DatasetWrapper.BUILDING[-1]
+        wrapper = AbstractDataset.processing()
         self.annotate(wrapper)
     @property

datamaestro/download/custom.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import Protocol
 from pathlib import Path
 from datamaestro import Context
-from datamaestro.definitions import DatasetWrapper
 from datamaestro.download import Resource
@@ -10,20 +9,13 @@ class Downloader(Protocol):
         pass
-class CustomResource(Resource):
-    def __init__(self, ds_wrapper: DatasetWrapper, downloader: Downloader):
-        self.ds_wrapper = ds_wrapper
+class custom_download(Resource):
+    def __init__(self, varname: str, downloader: Downloader):
+        super().__init__(varname)
         self.downloader = downloader
     def prepare(self):
-        pass
+        return self.definition.datapath
     def download(self, force=False):
-        self.downloader(self.context, self.ds_wrapper.datapath, force=force)
-def custom_download(downloader: Downloader) -> Path:
-    ds_wrapper = DatasetWrapper.BUILDING[-1]
-    ds_wrapper.ordered_resources.append(CustomResource(ds_wrapper, downloader))
-    return ds_wrapper.datapath
+        self.downloader(self.context, self.definition.datapath, force=force)

datamaestro/download/huggingface.py CHANGED Viewed

@@ -5,7 +5,7 @@ from datamaestro.download import Download
 class hf_download(Download):
-    """Use Hugging Face to donwload a file"""
+    """Use Hugging Face to download a file"""
     def __init__(
         self,

datamaestro/download/single.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 import urllib3
 from pathlib import Path
 import re
-from datamaestro.utils import copyfileobjs, FileChecker
+from datamaestro.utils import copyfileobjs
 from datamaestro.stream import Transform
 from datamaestro.download import Download
@@ -35,7 +35,7 @@ class SingleDownload(Download):
         return self.path
     def download(self, force=False):
-        if not self.path.is_file():
+        if not self.path.is_file() and not force:
             self._download(self.path)
@@ -96,20 +96,6 @@ class filedownloader(SingleDownload):
         logging.info("Created file %s" % destination)
-def file_from_url(
-    filename: str,
-    url: str,
-    *,
-    size: Optional[int] = None,
-    transforms: Optional[Transform] = None,
-    checker: Optional[FileChecker] = None,
-) -> Path:
-    """Defines a file that should be downloaded from"""
-    downloader = filedownloader(filename, url, size, transforms, checker)
-    downloader.contextualize()
-    return downloader.path
 class concatdownload(SingleDownload):
     """Concatenate all files in an archive"""

datamaestro/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.3.2'
-__version_tuple__ = version_tuple = (1, 3, 2)
+__version__ = version = '1.4.1'
+__version_tuple__ = version_tuple = (1, 4, 1)

{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datamaestro
-Version: 1.3.2
+Version: 1.4.1
 Summary: "Dataset management command line and API"
 Home-page: https://github.com/experimaestro/datamaestro
 Author: Benjamin Piwowarski
@@ -97,22 +97,10 @@ $ datamaestro search tag:image
 [image] com.lecun.mnist
 $ datamaestro prepare com.lecun.mnist
-INFO:root:Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz into /home/bpiwowar/datamaestro/data/image/com/lecun/mnist/t10k-labels-idx1-ubyte
-INFO:root:Transforming file
-INFO:root:Created file /home/bpiwowar/datamaestro/data/image/com/lecun/mnist/t10k-labels-idx1-ubyte
-INFO:root:Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz into /home/bpiwowar/datamaestro/data/image/com/lecun/mnist/t10k-images-idx3-ubyte
-INFO:root:Transforming file
-INFO:root:Created file /home/bpiwowar/datamaestro/data/image/com/lecun/mnist/t10k-images-idx3-ubyte
-INFO:root:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz into /home/bpiwowar/datamaestro/data/image/com/lecun/mnist/train-labels-idx1-ubyte
-INFO:root:Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
-Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz: 32.8kB [00:00, 92.1kB/s]                                                            INFO:root:Transforming file
-INFO:root:Created file /home/bpiwowar/datamaestro/data/image/com/lecun/mnist/train-labels-idx1-ubyte
-INFO:root:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz into /home/bpiwowar/datamaestro/data/image/com/lecun/mnist/train-images-idx3-ubyte
-INFO:root:Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
-Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz: 9.92MB [00:00, 10.6MB/s]
-INFO:root:Transforming file
-INFO:root:Created file /home/bpiwowar/datamaestro/data/image/com/lecun/mnist/train-images-idx3-ubyte
-...JSON...
+INFO:root:Materializing 4 resources
+INFO:root:Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz into .../datamaestro/store/com/lecun/train_images.idx
+INFO:root:Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz into .../datamaestro/store/com/lecun/test_images.idx
+INFO:root:Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz into .../datamaestro/store/com/lecun/test_labels.idx
 ```
 The previous command also returns a JSON on standard output
@@ -158,13 +146,12 @@ and is integrated with [experimaestro](http://experimaestro.github.io/experimaes
 Its syntax is described in the [documentation](https://datamaestro.readthedocs.io).
-For MNIST, this corresponds to.
+For instance, the MNIST dataset can be described by the following
 ```python
-from datamaestro_image.data import ImageClassification, LabelledImages, Base, IDXImage
-from datamaestro.download.single import filedownloader
-from datamaestro.definitions import  argument, datatasks, datatags, dataset
-from datamaestro.data.tensor import IDX
+from datamaestro import dataset
+from datamaestro.download.single import download_file
+from datamaestro_image.data import ImageClassification, LabelledImages, IDXImage
 @filedownloader("train_images.idx", "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
@@ -175,26 +162,37 @@ from datamaestro.data.tensor import IDX
   ImageClassification,
   url="http://yann.lecun.com/exdb/mnist/",
 )
-def MNIST(train_images, train_labels, test_images, test_labels):
-  """The MNIST database
-  The MNIST database of handwritten digits, available from this page, has a
-  training set of 60,000 examples, and a test set of 10,000 examples. It is a
-  subset of a larger set available from NIST. The digits have been
-  size-normalized and centered in a fixed-size image.
-  """
-  return {
-    "train": LabelledImages(
-      images=IDXImage(path=train_images),
-      labels=IDX(path=train_labels)
-    ),
-    "test": LabelledImages(
-      images=IDXImage(path=test_images),
-      labels=IDX(path=test_labels)
-    ),
-  }
+    return ImageClassification(
+        train=LabelledImages(
+            images=IDXImage(path=train_images), labels=IDXImage(path=train_labels)
+        ),
+        test=LabelledImages(
+            images=IDXImage(path=test_images), labels=IDXImage(path=test_labels)
+        ),
+    )
+```
+When building dataset modules, some extra documentation can be provided:
+```yaml
+  ids: [com.lecun.mnist]
+  entry_point: "datamaestro_image.config.com.lecun:mnist"
+  title: The MNIST database
+  url: http://yann.lecun.com/exdb/mnist/
+  groups: [image-classification]
+  description: |
+    The MNIST database of handwritten digits, available from this page,
+    has a training set of 60,000 examples, and a test set of 10,000
+    examples. It is a subset of a larger set available from NIST. The
+    digits have been size-normalized and centered in a fixed-size image.
 ```
+This will allow to
+1. Document the dataset
+2. Allow to use the command line interface to manipulate it (download resources, etc.)
 # 0.8.0
 - Integration with other repositories: abstracting away the notion of dataset

{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/RECORD RENAMED Viewed

@@ -1,14 +1,14 @@
-datamaestro/__init__.py,sha256=gnbxrPFzIuG4oR2Qrw9UYS0SNVsf4yCtqNvzSjstdak,376
-datamaestro/__main__.py,sha256=tJTf1sTWKRIatvBcHlWDIZRZodAZ2B2zkD01pD89MYk,9024
-datamaestro/context.py,sha256=S7sQ6RQVLjtoY5iyAikfyvfbqoaoDzcHt4-js8t6mMg,13653
-datamaestro/definitions.py,sha256=HEnwB32Reb4ouLOjboEOe_j88keBZPQ0SU6OrO_ohLU,18764
+datamaestro/__init__.py,sha256=LR8nx7H3Fo97O0gJXV2PxQezsmSTDLAg_nQEXB5QAjc,322
+datamaestro/__main__.py,sha256=2p36ZcJcZAL9NZBUkMaYRUhKyqhheVPXMGw6K1KNwhk,9196
+datamaestro/context.py,sha256=KsXYNTt4xX4zEVrnd2hciP7PVCh1StRzjU1Ih6VeCtU,13532
+datamaestro/definitions.py,sha256=EyrN24HcQmW_pS2K5hGRF07eJ36mQDFduIGvHmMSzsk,18825
 datamaestro/record.py,sha256=m3WGsPcZ1LouQXNJOBUK3QusAIRiuy6T_oqhq09-Ckg,5504
 datamaestro/registry.py,sha256=M7QJkcWJP_cxAoqIioLQ01ou2Zg9RqGQvW0XGVspYFE,1421
 datamaestro/search.py,sha256=bRT-91-2VJJ2JSfNaS1mzaVfqq_HMVBVs-RBj0w-ypM,2906
 datamaestro/settings.py,sha256=HYSElTUYZ6DZocBb9o3ifm6WW9knRO64XJUwxGIpvwQ,1304
 datamaestro/sphinx.py,sha256=bp7x_2BFoTSwTqcVZDM8R8cWa7G2pz0Zb8GS054lLYM,6996
 datamaestro/utils.py,sha256=9m-AVVww6InAZfGFiGy6XJzfExpYNqH1fhWQEezjafA,6536
-datamaestro/version.py,sha256=2MIKMHG_bp3KmQVZwa0rSvoTHIRfxwkSxFOUhMK4eQc,511
+datamaestro/version.py,sha256=2wP77AlenYjrtKg1nXf5noV1SfpanFafZAGSe7wvBys,511
 datamaestro/annotations/__init__.py,sha256=jLprrxSBa5QIqc--vqycEcxU4CR9WjVNRaqR5lH0EuE,39
 datamaestro/annotations/agreement.py,sha256=xEH0ddZxdJ_oG_150PoOa-WjY_OaeQja3FzMzY5IB6k,955
 datamaestro/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,16 +19,14 @@ datamaestro/data/csv.py,sha256=jcXFVBOEQoSi3YL60bqtwjCf2YXHboaMpUmiXZpzuPM,2506
 datamaestro/data/huggingface.py,sha256=rCMiMqVgNI9zRAgm9PYnbwb7musYryBoIP3HuJmH4sg,691
 datamaestro/data/ml.py,sha256=7Rv4Tb9g17HDj8mOBJpIDjgolGQAd5Wrb0mHlnm-bPE,709
 datamaestro/data/tensor.py,sha256=in36UQz4cdUEVmCS62pInu9RNekohRON667Z_JqNdhk,2254
-datamaestro/datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamaestro/datasets/yaml_repository.py,sha256=X5JjA2dQ5xfdYSUgL2EbZhrOYn-FPiBOAK97kw4kwqo,2533
-datamaestro/download/__init__.py,sha256=XcRw9acAq1IwhLQZpj2HpMNEaMesA5BbllJpbRCkOwA,2846
+datamaestro/download/__init__.py,sha256=EBoAcw2wErS8ymEYs7LJKez4UO-Gwhe4YgqRAysOxRY,2865
 datamaestro/download/archive.py,sha256=G-2gzepknqT7Us3naMGAApGVGJMeHQIxM-tSpaa9ark,5608
-datamaestro/download/custom.py,sha256=2-gFoOgQ8J93HjH9sc7u6wjVYm7DmSytP1ty2O6-d8k,839
-datamaestro/download/huggingface.py,sha256=LkzmZo2Z0yccqAfj7di7jDNGFrMKN9m8IM8SfexOomY,1125
+datamaestro/download/custom.py,sha256=DUjDVAWuHC6sV_apMQb44Yjd6HUXkHY6Ob52FQY3t-M,587
+datamaestro/download/huggingface.py,sha256=b4Y437ATYrugdkvqZrPQmqiXXSrmYyqEKDVI0wnIGDE,1125
 datamaestro/download/links.py,sha256=GFnq_AzI_uen7JBuGWD9qveeC9QFBWDrSnj7pOcwWwM,3352
 datamaestro/download/manual.py,sha256=-T2QWxKAiN3ZbSujjQUVeWDEDFonw9VnlzCfBIHcLao,190
 datamaestro/download/multiple.py,sha256=Mrr0ObHM5cE1CPSHE9PKIrox3qZVgxwRyxLzNXp0LqM,2159
-datamaestro/download/single.py,sha256=bMDLldvODp2ZXyxXeKLT4qbL-v4igA6A7HVjIt2Cf8c,4526
+datamaestro/download/single.py,sha256=fCIfZdR14YN09MQTgcxL21PWu5CjELfIClgWjFpR5mg,4148
 datamaestro/download/sync.py,sha256=Z_LsXj4kbZWIYKTVJZEhfdpYiv6wXOOIyw8LahmEcqs,836
 datamaestro/download/todo.py,sha256=y3YnmWC_i-u23ce-vreIwIXZcoO-uA0HXErgJPThnco,256
 datamaestro/download/wayback.py,sha256=B9X1P9jElvd_qnUs9aX0TAO-NrNyvuHLYDAcpNq354w,5430
@@ -42,9 +40,9 @@ datamaestro/test/conftest.py,sha256=it4S5Qq1CA_U8qM0pr4m7v-1dhLj5Y49WjVg5Ee3mpM,
 datamaestro/test/test_annotations.py,sha256=XUjDWb3FJimSD91wcItJ0lLwTBmvN4wVu_EgTKSvV2c,278
 datamaestro/test/test_download_handlers.py,sha256=-Gofr89zqIyeI8C4rZqfYR3JfiZVImdcSz9s6q361zQ,641
 datamaestro/test/test_record.py,sha256=hNZ3uo2i5FZ0VsOHRwvLO1Z6Zce92PdipAF65UptPB8,1156
-datamaestro-1.3.2.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
-datamaestro-1.3.2.dist-info/METADATA,sha256=1RJCcSxd3VdZ1VOMrVlQEA_cQuCBbFGC-fB1NjOWVPY,8990
-datamaestro-1.3.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-datamaestro-1.3.2.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
-datamaestro-1.3.2.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
-datamaestro-1.3.2.dist-info/RECORD,,
+datamaestro-1.4.1.dist-info/licenses/LICENSE,sha256=WJ7YI-moTFb-uVrFjnzzhGJrnL9P2iqQe8NuED3hutI,35141
+datamaestro-1.4.1.dist-info/METADATA,sha256=jGy6z11AvalmLQuwby5XSEViOS55DtMfq21fhs_rW14,8189
+datamaestro-1.4.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+datamaestro-1.4.1.dist-info/entry_points.txt,sha256=8qMhwSRvFG2iBqtJYVD22Zd4s4c3YkODtcp0Ajw1knw,133
+datamaestro-1.4.1.dist-info/top_level.txt,sha256=XSznaMNAA8jELV7-TOqaAgDsjLzUf9G9MxL7C4helT0,12
+datamaestro-1.4.1.dist-info/RECORD,,

datamaestro/datasets/__init__.py DELETED Viewed

File without changes

datamaestro/datasets/yaml_repository.py DELETED Viewed

@@ -1,103 +0,0 @@
-import re
-from typing import Iterator, Optional
-from functools import cached_property
-from attrs import field
-import importlib
-from omegaconf import OmegaConf
-from functools import partial
-from attrs import define
-from datamaestro import BaseRepository
-from datamaestro.definitions import AbstractDataset, DatasetWrapper
-from datamaestro.data import Base
-re_spec = re.compile(r"""^(\w\.)+:(\w+)""")
-@define
-class RepositoryDataset:
-    ids: list[str]
-    """ID(s) of this dataset"""
-    entry_point: str = field(validator=re_spec.match)
-    """The entry point"""
-    title: str
-    """The full name of the dataset"""
-    description: str
-    """Description of the dataset"""
-    url: Optional[str]
-    """The URL"""
-    groups: Optional[list[str]]
-    """Groups to which this repository belongs"""
-@define
-class RepositoryAuthors:
-    name: str
-    email: str
-@define
-class RepositoryGroup:
-    name: str
-    tasks: list[str]
-    tags: list[str]
-@define
-class RepositoryConfiguration:
-    namespace: str
-    authors: list[RepositoryAuthors]
-    description: str
-    groups: dict[str, RepositoryGroup]
-    datasets: list[RepositoryDataset]
-class YAMLDataset(AbstractDataset):
-    def __init__(self, repository: "YAMLRepository", information: RepositoryDataset):
-        super().__init__(repository)
-        self.information = information
-        self.id = self.information.ids[0]
-        self.aliases = set(self.information.ids)
-    @cached_property
-    def wrapper(self) -> DatasetWrapper:
-        module, func_name = self.information.entry_point.split(":")
-        wrapper = getattr(importlib.import_module(module), func_name)
-        return wrapper
-    def _prepare(self) -> "Base":
-        return self.wrapper()
-    def download(self, **kwargs):
-        return self.wrapper.download(**kwargs)
-class YAMLRepository(BaseRepository):
-    """YAML-based repository"""
-    @property
-    def id(self):
-        return self.configuration.namespace
-    @property
-    def name(self):
-        return self.configuration.namespace
-    @cached_property
-    def configuration(self):
-        schema = OmegaConf.structured(RepositoryConfiguration)
-        with importlib.resources.path(
-            self.__class__.__module__, "datamaestro.yaml"
-        ) as fp:
-            conf = OmegaConf.load(fp)
-        conf: RepositoryConfiguration = OmegaConf.merge(schema, conf)
-        return conf
-    def __iter__(self) -> Iterator["AbstractDataset"]:
-        return map(partial(YAMLDataset, self), self.configuration.datasets)

{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{datamaestro-1.3.2.dist-info → datamaestro-1.4.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

datamaestro 1.3.2__py3-none-any.whl → 1.4.1__py3-none-any.whl

datamaestro 1.3.2py3-none-any.whl → 1.4.1py3-none-any.whl