PyPI - anemoi-datasets - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

anemoi-datasets 0.2.1py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

anemoi/datasets/__main__.py +7 -51
anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/__init__.py +5 -59
anemoi/datasets/commands/copy.py +141 -83
anemoi/datasets/commands/create.py +14 -3
anemoi/datasets/commands/inspect/__init__.py +1 -5
anemoi/datasets/create/__init__.py +3 -0
anemoi/datasets/create/loaders.py +31 -0
anemoi/datasets/create/persistent.py +1 -3
anemoi/datasets/create/statistics/__init__.py +2 -2
anemoi/datasets/create/statistics/summary.py +1 -4
anemoi/datasets/create/writer.py +4 -3
anemoi/datasets/data/indexing.py +1 -3
anemoi/datasets/data/misc.py +5 -6
anemoi/datasets/data/stores.py +2 -6
anemoi/datasets/data/unchecked.py +1 -6
{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/METADATA +30 -21
{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/RECORD +22 -22
{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/WHEEL +0 -0
{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/top_level.txt +0 -0

anemoi/datasets/__main__.py CHANGED Viewed

@@ -8,64 +8,20 @@
 # nor does it submit to any jurisdiction.
 #
-import argparse
-import logging
-import sys
-import traceback
+from anemoi.utils.cli import cli_main
+from anemoi.utils.cli import make_parser
 from . import __version__
 from .commands import COMMANDS
-LOG = logging.getLogger(__name__)
-def main():
-    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
-    parser.add_argument(
-        "--version",
-        "-V",
-        action="store_true",
-        help="show the version and exit",
-    )
-    parser.add_argument(
-        "--debug",
-        "-d",
-        action="store_true",
-        help="Debug mode",
-    )
-    subparsers = parser.add_subparsers(help="commands:", dest="command")
-    for name, command in COMMANDS.items():
-        command_parser = subparsers.add_parser(name, help=command.__doc__)
-        command.add_arguments(command_parser)
+# For read-the-docs
+def create_parser():
+    return make_parser(__doc__, COMMANDS)
-    args = parser.parse_args()
-    if args.version:
-        print(__version__)
-        return
-    if args.command is None:
-        parser.print_help()
-        return
-    cmd = COMMANDS[args.command]
-    logging.basicConfig(
-        format="%(asctime)s %(levelname)s %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S",
-        level=logging.DEBUG if args.debug else logging.INFO,
-    )
-    try:
-        cmd.run(args)
-    except ValueError as e:
-        traceback.print_exc()
-        LOG.error("\n💣 %s", str(e).lstrip())
-        LOG.error("💣 Exiting")
-        sys.exit(1)
+def main():
+    cli_main(__version__, __doc__, COMMANDS)
 if __name__ == "__main__":

anemoi/datasets/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.1'
-__version_tuple__ = version_tuple = (0, 2, 1)
+__version__ = version = '0.3.1'
+__version_tuple__ = version_tuple = (0, 3, 1)

anemoi/datasets/commands/__init__.py CHANGED Viewed

@@ -8,69 +8,15 @@
 # nor does it submit to any jurisdiction.
 #
-import argparse
-import importlib
-import logging
 import os
-import sys
-LOG = logging.getLogger(__name__)
+from anemoi.utils.cli import Command
+from anemoi.utils.cli import Failed
+from anemoi.utils.cli import register_commands
+__all__ = ["Command"]
-def register(here, package, select, fail=None):
-    result = {}
-    not_available = {}
-    for p in os.listdir(here):
-        full = os.path.join(here, p)
-        if p.startswith("_"):
-            continue
-        if not (p.endswith(".py") or (os.path.isdir(full) and os.path.exists(os.path.join(full, "__init__.py")))):
-            continue
-        name, _ = os.path.splitext(p)
-        try:
-            imported = importlib.import_module(
-                f".{name}",
-                package=package,
-            )
-        except ImportError as e:
-            not_available[name] = e
-            continue
-        obj = select(imported)
-        if obj is not None:
-            result[name] = obj
-    for name, e in not_available.items():
-        if fail is None:
-            pass
-        if callable(fail):
-            result[name] = fail(name, e)
-    return result
-class Command:
-    def run(self, args):
-        raise NotImplementedError(f"Command not implemented: {args.command}")
-class Failed(Command):
-    def __init__(self, name, error):
-        self.name = name
-        self.error = error
-    def add_arguments(self, command_parser):
-        command_parser.add_argument("x", nargs=argparse.REMAINDER)
-    def run(self, args):
-        print(f"Command '{self.name}' not available: {self.error}")
-        sys.exit(1)
-COMMANDS = register(
+COMMANDS = register_commands(
     os.path.dirname(__file__),
     __name__,
     lambda x: x.command(),

anemoi/datasets/commands/copy.py CHANGED Viewed

@@ -41,24 +41,19 @@ zinfo https://object-store.os-api.cci1.ecmwf.int/
 """
-class CopyMixin:
-    internal = True
-    timestamp = True
-    def add_arguments(self, command_parser):
-        command_parser.add_argument("--transfers", type=int, default=8)
-        command_parser.add_argument("--block-size", type=int, default=100)
-        command_parser.add_argument("--overwrite", action="store_true")
-        command_parser.add_argument("--progress", action="store_true")
-        command_parser.add_argument("--nested", action="store_true", help="Use ZARR's nested directpry backend.")
-        command_parser.add_argument(
-            "--rechunk",
-            nargs="+",
-            help="Rechunk given array.",
-            metavar="array=i,j,k,l",
-        )
-        command_parser.add_argument("source")
-        command_parser.add_argument("target")
+class Copier:
+    def __init__(self, source, target, transfers, block_size, overwrite, resume, progress, nested, rechunk, **kwargs):
+        self.source = source
+        self.target = target
+        self.transfers = transfers
+        self.block_size = block_size
+        self.overwrite = overwrite
+        self.resume = resume
+        self.progress = progress
+        self.nested = nested
+        self.rechunk = rechunk
+        self.rechunking = rechunk.split(",") if rechunk else []
     def _store(self, path, nested=False):
         if nested:
@@ -67,30 +62,56 @@ class CopyMixin:
             return zarr.storage.NestedDirectoryStore(path)
         return path
-    def copy_chunk(self, n, m, source, target, block_size, _copy, progress):
+    def copy_chunk(self, n, m, source, target, _copy, progress):
         if _copy[n:m].all():
             LOG.info(f"Skipping {n} to {m}")
             return None
-        for i in tqdm.tqdm(
-            range(n, m),
-            desc=f"Copying {n} to {m}",
-            leave=False,
-            disable=not isatty and not progress,
-        ):
-            target[i] = source[i]
+        if self.block_size % self.data_chunks[0] == 0:
+            target[slice(n, m)] = source[slice(n, m)]
+        else:
+            LOG.warning(
+                f"Block size ({self.block_size}) is not a multiple of target chunk size ({self.data_chunks[0]}). Slow copy expected."
+            )
+            if self.transfers > 1:
+                # race condition, different threads might copy the same data to the same chunk
+                raise NotImplementedError(
+                    "Block size is not a multiple of target chunk size. Parallel copy not supported."
+                )
+            for i in tqdm.tqdm(
+                range(n, m),
+                desc=f"Copying {n} to {m}",
+                leave=False,
+                disable=not isatty and not progress,
+            ):
+                target[i] = source[i]
         return slice(n, m)
-    def copy_data(self, source, target, transfers, block_size, _copy, progress, rechunking):
+    def parse_rechunking(self, rechunking, source_data):
+        shape = source_data.shape
+        chunks = list(source_data.chunks)
+        for i, c in enumerate(rechunking):
+            if not c:
+                continue
+            elif c == "full":
+                chunks[i] = shape[i]
+            c = int(c)
+            c = min(c, shape[i])
+            chunks[i] = c
+        chunks = tuple(chunks)
+        if chunks != source_data.chunks:
+            LOG.info(f"Rechunking data from {source_data.chunks} to {chunks}")
+            # if self.transfers > 1:
+            #    raise NotImplementedError("Rechunking with multiple transfers is not implemented")
+        return chunks
+    def copy_data(self, source, target, _copy, progress):
         LOG.info("Copying data")
         source_data = source["data"]
-        chunks = list(source_data.chunks)
-        if "data" in rechunking:
-            assert len(chunks) == len(rechunking["data"]), (chunks, rechunking["data"])
-            for i, c in enumerate(rechunking["data"]):
-                if c != -1:
-                    chunks[i] = c
+        self.data_chunks = self.parse_rechunking(self.rechunking, source_data)
         target_data = (
             target["data"]
@@ -98,12 +119,12 @@ class CopyMixin:
             else target.create_dataset(
                 "data",
                 shape=source_data.shape,
-                chunks=chunks,
+                chunks=self.data_chunks,
                 dtype=source_data.dtype,
             )
         )
-        executor = ThreadPoolExecutor(max_workers=transfers)
+        executor = ThreadPoolExecutor(max_workers=self.transfers)
         tasks = []
         n = 0
         while n < target_data.shape[0]:
@@ -111,15 +132,14 @@ class CopyMixin:
                 executor.submit(
                     self.copy_chunk,
                     n,
-                    min(n + block_size, target_data.shape[0]),
+                    min(n + self.block_size, target_data.shape[0]),
                     source_data,
                     target_data,
-                    block_size,
                     _copy,
                     progress,
                 )
             )
-            n += block_size
+            n += self.block_size
         for future in tqdm.tqdm(as_completed(tasks), total=len(tasks), smoothing=0):
             copied = future.result()
@@ -131,7 +151,7 @@ class CopyMixin:
         LOG.info("Copied data")
-    def copy_array(self, name, source, target, transfers, block_size, _copy, progress, rechunking):
+    def copy_array(self, name, source, target, _copy, progress):
         for k, v in source.attrs.items():
             target.attrs[k] = v
@@ -139,14 +159,14 @@ class CopyMixin:
             return
         if name == "data":
-            self.copy_data(source, target, transfers, block_size, _copy, progress, rechunking)
+            self.copy_data(source, target, _copy, progress)
             return
         LOG.info(f"Copying {name}")
         target[name] = source[name]
         LOG.info(f"Copied {name}")
-    def copy_group(self, source, target, transfers, block_size, _copy, progress, rechunking):
+    def copy_group(self, source, target, _copy, progress):
         import zarr
         for k, v in source.attrs.items():
@@ -158,25 +178,19 @@ class CopyMixin:
                 self.copy_group(
                     source[name],
                     group,
-                    transfers,
-                    block_size,
                     _copy,
                     progress,
-                    rechunking,
                 )
             else:
                 self.copy_array(
                     name,
                     source,
                     target,
-                    transfers,
-                    block_size,
                     _copy,
                     progress,
-                    rechunking,
                 )
-    def copy(self, source, target, transfers, block_size, progress, rechunking):
+    def copy(self, source, target, progress):
         import zarr
         if "_copy" not in target:
@@ -187,32 +201,26 @@ class CopyMixin:
         _copy = target["_copy"]
         _copy_np = _copy[:]
-        self.copy_group(source, target, transfers, block_size, _copy_np, progress, rechunking)
+        self.copy_group(source, target, _copy_np, progress)
         del target["_copy"]
-    def run(self, args):
+    def run(self):
         import zarr
         # base, ext = os.path.splitext(os.path.basename(args.source))
         # assert ext == ".zarr", ext
         # assert "." not in base, base
-        LOG.info(f"Copying {args.source} to {args.target}")
-        rechunking = {}
-        if args.rechunk:
-            for r in args.rechunk:
-                k, v = r.split("=")
-                if k != "data":
-                    raise ValueError(f"Only rechunking data is supported: {k}")
-                values = v.split(",")
-                values = [-1 if x == "" else x for x in values]
-                values = tuple(int(x) for x in values)
-                rechunking[k] = values
-            for k, v in rechunking.items():
-                LOG.info(f"Rechunking {k} to {v}")
-        try:
-            target = zarr.open(self._store(args.target, args.nested), mode="r")
+        LOG.info(f"Copying {self.source} to {self.target}")
+        def target_exists():
+            try:
+                zarr.open(self._store(self.target), mode="r")
+                return True
+            except ValueError:
+                return False
+        def target_finished():
+            target = zarr.open(self._store(self.target), mode="r")
             if "_copy" in target:
                 done = sum(1 if x else 0 for x in target["_copy"])
                 todo = len(target["_copy"])
@@ -222,26 +230,76 @@ class CopyMixin:
                     todo,
                     int(done / todo * 100 + 0.5),
                 )
+                return False
             elif "sums" in target and "data" in target:  # sums is copied last
-                LOG.error("Target already exists")
-                return
-        except ValueError as e:
-            LOG.info(f"Target does not exist: {e}")
-            pass
-        source = zarr.open(self._store(args.source), mode="r")
-        if args.overwrite:
-            target = zarr.open(self._store(args.target, args.nested), mode="w")
-        else:
-            try:
-                target = zarr.open(self._store(args.target, args.nested), mode="w+")
-            except ValueError:
-                target = zarr.open(self._store(args.target, args.nested), mode="w")
-        self.copy(source, target, args.transfers, args.block_size, args.progress, rechunking)
+                return True
+            return False
+        def open_target():
+            if not target_exists():
+                return zarr.open(self._store(self.target, self.nested), mode="w")
+            if self.overwrite:
+                LOG.error("Target already exists, overwriting.")
+                return zarr.open(self._store(self.target, self.nested), mode="w")
+            if self.resume:
+                if target_finished():
+                    LOG.error("Target already exists and is finished.")
+                    sys.exit(0)
+                LOG.error("Target already exists, resuming copy.")
+                return zarr.open(self._store(self.target, self.nested), mode="w+")
+            LOG.error("Target already exists, use either --overwrite or --resume.")
+            sys.exit(1)
+        target = open_target()
+        assert target is not None, target
+        source = zarr.open(self._store(self.source), mode="r")
+        self.copy(source, target, self.progress)
+class CopyMixin:
+    internal = True
+    timestamp = True
+    def add_arguments(self, command_parser):
+        group = command_parser.add_mutually_exclusive_group()
+        group.add_argument(
+            "--overwrite",
+            action="store_true",
+            help="Overwrite existing dataset. This will delete the target dataset if it already exists. Cannot be used with --resume.",
+        )
+        group.add_argument(
+            "--resume", action="store_true", help="Resume copying an existing dataset. Cannot be used with --overwrite."
+        )
+        command_parser.add_argument("--transfers", type=int, default=8, help="Number of parallel transfers.")
+        command_parser.add_argument(
+            "--progress", action="store_true", help="Force show progress bar, even if not in an interactive shell."
+        )
+        command_parser.add_argument("--nested", action="store_true", help="Use ZARR's nested directpry backend.")
+        command_parser.add_argument(
+            "--rechunk", help="Rechunk the target data array. Rechunk size should be a diviser of the block size."
+        )
+        command_parser.add_argument(
+            "--block-size",
+            type=int,
+            default=100,
+            help="For optimisation purposes, data is transfered by blocks. Default is 100.",
+        )
+        command_parser.add_argument("source", help="Source location.")
+        command_parser.add_argument("target", help="Target location.")
+    def run(self, args):
+        Copier(**vars(args)).run()
 class Copy(CopyMixin, Command):
-    pass
+    """Copy a dataset from one location to another."""
 command = Copy

anemoi/datasets/commands/create.py CHANGED Viewed

@@ -4,13 +4,24 @@ from . import Command
 class Create(Command):
+    """Create a dataset."""
     internal = True
     timestamp = True
     def add_arguments(self, command_parser):
-        command_parser.add_argument("--overwrite", action="store_true", help="Overwrite existing files")
-        command_parser.add_argument("config", help="Configuration file")
-        command_parser.add_argument("path", help="Path to store the created data")
+        command_parser.add_argument(
+            "--overwrite",
+            action="store_true",
+            help="Overwrite existing files. This will delete the target dataset if it already exists.",
+        )
+        command_parser.add_argument(
+            "--test",
+            action="store_true",
+            help="Build a small dataset, using only the first dates. And, when possible, using low resolution and less ensemble members.",
+        )
+        command_parser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
+        command_parser.add_argument("path", help="Path to store the created data.")
     def run(self, args):
         kwargs = vars(args)

anemoi/datasets/commands/inspect/__init__.py CHANGED Viewed

@@ -11,16 +11,12 @@ import os
 from .. import Command
 from .zarr import InspectZarr
-# from .checkpoint import InspectCheckpoint
 class Inspect(Command, InspectZarr):
-    # class Inspect(Command, InspectCheckpoint, InspectZarr):
-    """Inspect a checkpoint or zarr file."""
+    """Inspect a zarr dataset."""
     def add_arguments(self, command_parser):
         # g = command_parser.add_mutually_exclusive_group()
-        # g.add_argument("--inspect", action="store_true", help="Inspect weights")
         command_parser.add_argument("path", metavar="PATH", nargs="+")
         command_parser.add_argument("--detailed", action="store_true")
         # command_parser.add_argument("--probe", action="store_true")

anemoi/datasets/create/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ class Creator:
         print=print,
         statistics_tmp=None,
         overwrite=False,
+        test=None,
         **kwargs,
     ):
         self.path = path  # Output path
@@ -27,6 +28,7 @@ class Creator:
         self.print = print
         self.statistics_tmp = statistics_tmp
         self.overwrite = overwrite
+        self.test = test
     def init(self, check_name=False):
         # check path
@@ -43,6 +45,7 @@ class Creator:
                 config=self.config,
                 statistics_tmp=self.statistics_tmp,
                 print=self.print,
+                test=self.test,
             )
             obj.initialise(check_name=check_name)

anemoi/datasets/create/loaders.py CHANGED Viewed

@@ -25,6 +25,7 @@ from anemoi.datasets.dates.groups import Groups
 from .check import DatasetName
 from .check import check_data_values
 from .chunks import ChunkFilter
+from .config import DictObj
 from .config import build_output
 from .config import loader_config
 from .input import build_input
@@ -55,6 +56,8 @@ class GenericDatasetHandler:
         self.path = path
         self.kwargs = kwargs
         self.print = print
+        if "test" in kwargs:
+            self.test = kwargs["test"]
     @classmethod
     def from_config(cls, *, config, path, print=print, **kwargs):
@@ -157,7 +160,35 @@ class InitialiserLoader(Loader):
         self.tmp_statistics.delete()
+        if self.test:
+            def test_dates(cfg, n=4):
+                LOG.warn("Running in test mode. Changing the list of dates to use only 4.")
+                groups = Groups(**cfg)
+                dates = groups.dates
+                return dict(start=dates[0], end=dates[n - 1], frequency=dates.frequency, group_by=n)
+            self.main_config.dates = test_dates(self.main_config.dates)
+            def set_to_test_mode(obj):
+                if isinstance(obj, (list, tuple)):
+                    for v in obj:
+                        set_to_test_mode(v)
+                    return
+                if isinstance(obj, (dict, DictObj)):
+                    if "grid" in obj:
+                        obj["grid"] = "20./20."
+                        LOG.warn(f"Running in test mode. Setting grid to {obj['grid']}")
+                    if "number" in obj:
+                        obj["number"] = obj["number"][0:3]
+                        LOG.warn(f"Running in test mode. Setting number to {obj['number']}")
+                    for k, v in obj.items():
+                        set_to_test_mode(v)
+            set_to_test_mode(self.main_config)
         LOG.info(self.main_config.dates)
         self.groups = Groups(**self.main_config.dates)
         self.output = build_output(self.main_config.output, parent=self)

anemoi/datasets/create/persistent.py CHANGED Viewed

@@ -28,9 +28,7 @@ class PersistentDict:
     # Used in parrallel, during data loading,
     # to write data in pickle files.
     def __init__(self, directory, create=True):
-        """dirname: str
-        The directory where the data will be stored.
-        """
+        """dirname: str The directory where the data will be stored."""
         self.dirname = directory
         self.name, self.ext = os.path.splitext(os.path.basename(self.dirname))
         if create:

anemoi/datasets/create/statistics/__init__.py CHANGED Viewed

@@ -26,14 +26,14 @@ LOG = logging.getLogger(__name__)
 def default_statistics_dates(dates):
-    """
-    Calculate default statistics dates based on the given list of dates.
+    """Calculate default statistics dates based on the given list of dates.
     Args:
         dates (list): List of datetime objects representing dates.
     Returns:
         tuple: A tuple containing the default start and end dates.
     """
     def to_datetime(d):

anemoi/datasets/create/statistics/summary.py CHANGED Viewed

@@ -17,10 +17,7 @@ from ..check import check_stats
 class Summary(dict):
-    """This class is used to store the summary statistics of a dataset.
-    It can be saved and loaded from a json file.
-    And does some basic checks on the data.
-    """
+    """This class is used to store the summary statistics of a dataset. It can be saved and loaded from a json file. And does some basic checks on the data."""
     STATS_NAMES = [
         "minimum",

anemoi/datasets/create/writer.py CHANGED Viewed

@@ -17,11 +17,12 @@ LOG = logging.getLogger(__name__)
 class ViewCacheArray:
     """A class that provides a caching mechanism for writing to a NumPy-like array.
-    The is initialized with a NumPy-like array, a shape and a list to reindex the first dimension.
-    The array is used to store the final data, while the cache is used to temporarily
-    store the data before flushing it to the array.
+    The is initialized with a NumPy-like array, a shape and a list to reindex the first
+    dimension. The array is used to store the final data, while the cache is used to
+    temporarily store the data before flushing it to the array.
     The `flush` method copies the contents of the cache to the final array.
     """
     def __init__(self, array, *, shape, indexes):

anemoi/datasets/data/indexing.py CHANGED Viewed

@@ -119,9 +119,7 @@ def _as_tuples(index):
 def expand_list_indexing(method):
-    """Allows to use slices, lists, and tuples to select data from the dataset.
-    Zarr does not support indexing with lists/arrays directly, so we need to implement it ourselves.
-    """
+    """Allows to use slices, lists, and tuples to select data from the dataset. Zarr does not support indexing with lists/arrays directly, so we need to implement it ourselves."""
     @wraps(method)
     def wrapper(self, index):

anemoi/datasets/data/misc.py CHANGED Viewed

@@ -88,13 +88,12 @@ def _frequency_to_hours(frequency):
 def _as_date(d, dates, last):
-    if isinstance(d, np.datetime64):
-        d = d.astype(datetime.datetime)
-    if isinstance(d, datetime.datetime):
-        if not d.minute == 0 and d.hour == 0 and d.second == 0:
-            return np.datetime64(d)
-        d = datetime.date(d.year, d.month, d.day)
+    # WARNING,  datetime.datetime is a subclass of datetime.date
+    # so we need to check for datetime.datetime first
+    if isinstance(d, (np.datetime64, datetime.datetime)):
+        return d
     if isinstance(d, datetime.date):
         d = d.year * 10_000 + d.month * 100 + d.day

anemoi/datasets/data/stores.py CHANGED Viewed

@@ -39,9 +39,7 @@ class ReadOnlyStore(zarr.storage.BaseStore):
 class HTTPStore(ReadOnlyStore):
-    """We write our own HTTPStore because the one used by zarr (fsspec) does not play
-    well with fork() and multiprocessing.
-    """
+    """We write our own HTTPStore because the one used by zarr (fsspec) does not play well with fork() and multiprocessing."""
     def __init__(self, url):
         self.url = url
@@ -59,9 +57,7 @@ class HTTPStore(ReadOnlyStore):
 class S3Store(ReadOnlyStore):
-    """We write our own S3Store because the one used by zarr (fsspec) does not play well
-    with fork() and multiprocessing.
-    """
+    """We write our own S3Store because the one used by zarr (fsspec) does not play well with fork() and multiprocessing."""
     def __init__(self, url):
         import boto3

anemoi/datasets/data/unchecked.py CHANGED Viewed

@@ -29,12 +29,7 @@ class check:
         @wraps(method)
         def wrapper(obj):
-            """
-            This is a decorator that checks the compatibility of the datasets
-            before calling the method. If the datasets are compatible, it
-            will return the result of the method, otherwise it will raise an
-            exception.
-            """
+            """This is a decorator that checks the compatibility of the datasets before calling the method. If the datasets are compatible, it will return the result of the method, otherwise it will raise an exception."""
             for d in obj.datasets[1:]:
                 getattr(obj, check)(obj.datasets[0], d)

{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: anemoi-datasets
-Version: 0.2.1
+Version: 0.3.1
 Summary: A package to hold various functions to support training of ML models on ECMWF data.
 Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
 License: Apache License
@@ -205,59 +205,68 @@ License: Apache License
            See the License for the specific language governing permissions and
            limitations under the License.
-Project-URL: Homepage, https://github.com/ecmwf/anemoi-datasets/
 Project-URL: Documentation, https://anemoi-datasets.readthedocs.io/
-Project-URL: Repository, https://github.com/ecmwf/anemoi-datasets/
+Project-URL: Homepage, https://github.com/ecmwf/anemoi-datasets/
 Project-URL: Issues, https://github.com/ecmwf/anemoi-datasets/issues
-Keywords: tools,datasets,ai
+Project-URL: Repository, https://github.com/ecmwf/anemoi-datasets/
+Keywords: ai,datasets,tools
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
-Classifier: Operating System :: OS Independent
 Requires-Python: >=3.9
 License-File: LICENSE
-Requires-Dist: anemoi-utils[provenance] >=0.1.7
-Requires-Dist: zarr <=2.17.0
-Requires-Dist: pyyaml
+Requires-Dist: anemoi-utils[provenance] >=0.3
 Requires-Dist: numpy
-Requires-Dist: tqdm
+Requires-Dist: pyyaml
 Requires-Dist: semantic-version
+Requires-Dist: tqdm
+Requires-Dist: zarr <=2.17
 Provides-Extra: all
+Requires-Dist: anemoi-utils[provenance] >=0.3 ; extra == 'all'
 Requires-Dist: boto3 ; extra == 'all'
-Requires-Dist: requests ; extra == 'all'
-Requires-Dist: s3fs ; extra == 'all'
 Requires-Dist: climetlab >=0.22.1 ; extra == 'all'
 Requires-Dist: earthkit-meteo ; extra == 'all'
-Requires-Dist: pyproj ; extra == 'all'
 Requires-Dist: ecmwflibs >=0.6.3 ; extra == 'all'
+Requires-Dist: numpy ; extra == 'all'
+Requires-Dist: pyproj ; extra == 'all'
+Requires-Dist: pyyaml ; extra == 'all'
+Requires-Dist: requests ; extra == 'all'
+Requires-Dist: s3fs ; extra == 'all'
+Requires-Dist: semantic-version ; extra == 'all'
+Requires-Dist: tqdm ; extra == 'all'
+Requires-Dist: zarr <=2.17 ; extra == 'all'
 Provides-Extra: create
 Requires-Dist: climetlab >=0.22.1 ; extra == 'create'
 Requires-Dist: earthkit-meteo ; extra == 'create'
-Requires-Dist: pyproj ; extra == 'create'
 Requires-Dist: ecmwflibs >=0.6.3 ; extra == 'create'
+Requires-Dist: pyproj ; extra == 'create'
 Provides-Extra: dev
 Requires-Dist: boto3 ; extra == 'dev'
-Requires-Dist: requests ; extra == 'dev'
-Requires-Dist: s3fs ; extra == 'dev'
 Requires-Dist: climetlab >=0.22.1 ; extra == 'dev'
 Requires-Dist: earthkit-meteo ; extra == 'dev'
-Requires-Dist: pyproj ; extra == 'dev'
 Requires-Dist: ecmwflibs >=0.6.3 ; extra == 'dev'
-Requires-Dist: sphinx ; extra == 'dev'
-Requires-Dist: sphinx-rtd-theme ; extra == 'dev'
 Requires-Dist: nbsphinx ; extra == 'dev'
 Requires-Dist: pandoc ; extra == 'dev'
+Requires-Dist: pyproj ; extra == 'dev'
+Requires-Dist: requests ; extra == 'dev'
+Requires-Dist: s3fs ; extra == 'dev'
+Requires-Dist: sphinx ; extra == 'dev'
+Requires-Dist: sphinx-argparse ; extra == 'dev'
+Requires-Dist: sphinx-rtd-theme ; extra == 'dev'
 Provides-Extra: docs
-Requires-Dist: sphinx ; extra == 'docs'
-Requires-Dist: sphinx-rtd-theme ; extra == 'docs'
 Requires-Dist: nbsphinx ; extra == 'docs'
 Requires-Dist: pandoc ; extra == 'docs'
+Requires-Dist: sphinx ; extra == 'docs'
+Requires-Dist: sphinx-argparse ; extra == 'docs'
+Requires-Dist: sphinx-rtd-theme ; extra == 'docs'
 Provides-Extra: remote
 Requires-Dist: boto3 ; extra == 'remote'
 Requires-Dist: requests ; extra == 'remote'

{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/RECORD RENAMED Viewed

@@ -1,28 +1,28 @@
 anemoi/datasets/__init__.py,sha256=DC7ttKT--pmhBQALX_Cn7P28dngsJucKi5y-Ydm28QM,700
-anemoi/datasets/__main__.py,sha256=CGl8WF7rWMx9EoArysla0-ThjUFtEZUEGM58LbdU488,1798
-anemoi/datasets/_version.py,sha256=MxUhzLJIZQfEpDTTcKSxciTGrMLd5v2VmMlHa2HGeo0,411
+anemoi/datasets/__main__.py,sha256=cLA2PidDTOUHaDGzd0_E5iioKYNe-PSTv567Y2fuwQk,723
+anemoi/datasets/_version.py,sha256=HzPz9rq3s1AiZXregKlqKaJJ2wGMtvH_a3V9la9CnpM,411
 anemoi/datasets/grids.py,sha256=3YBMMJodgYhavarXPAlMZHaMtDT9v2IbTmAXZTqf8Qo,8481
-anemoi/datasets/commands/__init__.py,sha256=Pc5bhVgW92ox1lMR5WUOLuhiY2HT6PsadSHclyw99Vc,1983
+anemoi/datasets/commands/__init__.py,sha256=qAybFZPBBQs0dyx7dZ3X5JsLpE90pwrqt1vSV7cqEIw,706
 anemoi/datasets/commands/compare.py,sha256=tN3eqihvnZ0rFc0OUzrfI34PHDlYfc2l90ZIQBE1TDQ,1300
-anemoi/datasets/commands/copy.py,sha256=GZ5TmJKDOAKka9zc0YUtvmqynRqBTeb3hI_v3jLtUDM,7995
-anemoi/datasets/commands/create.py,sha256=UVieF0g1cEgNP_myklUZOSH_MuxwfYzKay5s8WDRzro,562
+anemoi/datasets/commands/copy.py,sha256=fba-zjD0iTHHXHhPEcm8VhDzsXQXDUxlbtTA1TovyT0,9991
+anemoi/datasets/commands/create.py,sha256=POdOsVDlvRrHFFkI3SNXNgNIbSxkVUUPMoo660x7Ma0,987
 anemoi/datasets/commands/scan.py,sha256=HxsLdCgBMSdEXjlJfPq5M_9LxXHHQIoZ1ZEHO_AoPgA,2881
-anemoi/datasets/commands/inspect/__init__.py,sha256=SqiWlIJSov7-RnZmIQBzsE4Br7hgl9CqshpXaQqpios,1701
+anemoi/datasets/commands/inspect/__init__.py,sha256=v6fPUTdMRdmUiEUUs0F74QlzPr-x5XEEOql3mkFme7E,1500
 anemoi/datasets/commands/inspect/zarr.py,sha256=Q1waDTgdJZwJXNST4jkO4DCIbqbf2T_2Us2k6yKGToo,19684
 anemoi/datasets/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 anemoi/datasets/compute/recentre.py,sha256=j8LdC8kq1t4PW7WFTXf93hSxok10un8ENIPwCehzbP8,4768
-anemoi/datasets/create/__init__.py,sha256=o7pZTL71XqoD3a10VrSnwroAFvN4g_9o98jEoMArjfk,5731
+anemoi/datasets/create/__init__.py,sha256=jji65Zni5aPTvS269fAMix4pN9ukmSoK0z5SVsbpr5E,5807
 anemoi/datasets/create/check.py,sha256=DLjw-eyaCNxPhoKFsP4Yn_l3SIr57YHdyPR-tE5vx80,5791
 anemoi/datasets/create/chunks.py,sha256=YEDcr0K2KiiceSTiBuZzj0TbRbzZ9J546XO7rrrTFQw,2441
 anemoi/datasets/create/config.py,sha256=uLIp1WHg3hbqwwMV9EepMwJQsXJAGImkbo0okBeEVd4,7683
 anemoi/datasets/create/input.py,sha256=UqEIqbsld0whUJUPPVKMfF_LoeKTaTyxP5kBE6zjhsE,27888
-anemoi/datasets/create/loaders.py,sha256=5KzbkZMV5c64avDwanGznj54gMIbLvwb0dXWvgUuD0Q,28611
+anemoi/datasets/create/loaders.py,sha256=BDeb2CI_oWqIGcBnt39nRGIt3r3dK4rIidNO3pBunTk,29865
 anemoi/datasets/create/patch.py,sha256=xjCLhvIQKRqmypsKInRU1CvFh1uoaB3YGSQP1UVZZik,3682
-anemoi/datasets/create/persistent.py,sha256=vQuKuEggLGhNO8A7lsUHXzdVOhqAzZh50xsb-eSF6qQ,4307
+anemoi/datasets/create/persistent.py,sha256=nT8gvhVPdI1H3zW_F7uViGKIlQQ94jCDrMSWTmhQ2_A,4290
 anemoi/datasets/create/size.py,sha256=A1w6RkaL0L9IlwIdmYsCTJTecmY_QtvbkGf__jvQle0,1068
 anemoi/datasets/create/template.py,sha256=2roItOYJzjGB0bKS28f6EjfpomP0ppT4v6T9fYzjRxQ,4263
 anemoi/datasets/create/utils.py,sha256=H1-auNSZUSDW0Aog8CHnIfZlzgKE1XPoi1I40CqquA4,3676
-anemoi/datasets/create/writer.py,sha256=BHzPDhET2BnPt-359CZ_yaaR2otIz2iENbsyQIaktxU,1378
+anemoi/datasets/create/writer.py,sha256=G1qAPvdn8anGnpWYhvSSP4u3Km_tHKPdMXm0G4skKSk,1379
 anemoi/datasets/create/zarr.py,sha256=hwM_PaYTa_IgFY1VC7qdYTWQ5MXCWWlMrzXsV_eAY0Q,4776
 anemoi/datasets/create/functions/__init__.py,sha256=K-Wi11mZI5Y6od0y6I_apDutoeay7wNrtB1P3-PizgI,513
 anemoi/datasets/create/functions/filters/__init__.py,sha256=Xe9G54CKvCI3ji-7k0R5l0WZZdhlydRgawsXuBcX_hg,379
@@ -44,8 +44,8 @@ anemoi/datasets/create/functions/sources/opendap.py,sha256=T0CPinscfafrVLaye5ue-
 anemoi/datasets/create/functions/sources/recentre.py,sha256=t07LIXG3Hp9gmPkPriILVt86TxubsHyS1EL1lzwgtXY,1810
 anemoi/datasets/create/functions/sources/source.py,sha256=hPQnV_6UIxFw97uRKcTA8TplcgG1kC8NlFHoEaaLet4,1418
 anemoi/datasets/create/functions/sources/tendencies.py,sha256=kwS_GZt8R9kpfs5RrvxPb0Gj-5nDP0sgJgfSRCAwwww,4057
-anemoi/datasets/create/statistics/__init__.py,sha256=b5LXV1J3uKpmTkNHt8hLWgUo-C5WHA0ltxVJa7b0aLc,15449
-anemoi/datasets/create/statistics/summary.py,sha256=NHzKwsMOlJENBGs6GlbmcIq4mAwsfvR9q6mdfXXgCXk,3383
+anemoi/datasets/create/statistics/__init__.py,sha256=X50drgE-ltuNe7bSIyvyeC4GeTqGTQGbglh2-2aVWKE,15445
+anemoi/datasets/create/statistics/summary.py,sha256=sgmhA24y3VRyjmDUgTnPIqcHSlWBbFA0qynx6gJ9Xw8,3370
 anemoi/datasets/data/__init__.py,sha256=tacn6K_VZ-pYhLmGePG5sze8kmqGpqscYb-bMyQnWtk,888
 anemoi/datasets/data/concat.py,sha256=U6IZi6NkI6yccrDamgasENBqwyJ1m0ZesuDtHXoqEh8,3551
 anemoi/datasets/data/dataset.py,sha256=UDnidq2amyCT2COH05pGfDCJcmkdMj1ubtHk9cl-qcE,7384
@@ -54,21 +54,21 @@ anemoi/datasets/data/debug.py,sha256=PcyrjgxaLzeb_vf12pvUtPPVvBRHNm1SimythZvqsP4
 anemoi/datasets/data/ensemble.py,sha256=PcrdNL4DhAuWYSXgNxC6igDXpDndXC_QrbLrL4Lvj-Y,1138
 anemoi/datasets/data/forewards.py,sha256=4IsaNDhYlLiCbawUvTynm2vdpGPqdXcrSoAENwsJoqI,7456
 anemoi/datasets/data/grids.py,sha256=vgZMIQbv5SnIcnPu2ujsrAQ8VyBz5o2a1SnxsjXkDuw,7495
-anemoi/datasets/data/indexing.py,sha256=ymuFO2yH12ztYnP_gmHpuBuLmKAxv2t8Pz5m1gGmBzk,4808
+anemoi/datasets/data/indexing.py,sha256=625m__JG5m_tDMrkz1hB6Vydenwt0oHuyAlc-o3Zwos,4799
 anemoi/datasets/data/join.py,sha256=m_lpxWPy8-xYOjPbVoBV3V92VGtBFIriiDWvQM6KqXc,4893
 anemoi/datasets/data/masked.py,sha256=KZZ-3nq9saj_W8PTN9V4YdZ24BayHgECj12i4yjyKpc,3525
-anemoi/datasets/data/misc.py,sha256=a-YIrCaSkOuEKHT_Q1UYADkb2wYycekRrFwZCgyW8-s,10428
+anemoi/datasets/data/misc.py,sha256=m_28VIhX546RIoVfGpimPOThl5EwOhkun2UgWMAUxqw,10355
 anemoi/datasets/data/select.py,sha256=JoEepq8iRSSX6L75hzhLrBFhy0RJInuBM3C_Eu2Ryv0,3608
 anemoi/datasets/data/statistics.py,sha256=rWuG5qlfQoo9shOXR6TleJbJONwYggxxLy_HRet8azM,1582
-anemoi/datasets/data/stores.py,sha256=gJVyg4ydIsVXWwnww-UV3uaWNXLkcz_dx2r9AREPZrE,10869
+anemoi/datasets/data/stores.py,sha256=damJzNScaGenARAv8xpNa7d32f03MpGk5adRoRi34yw,10851
 anemoi/datasets/data/subset.py,sha256=RjfOMu7p69DZXRxQpvTfDOjVAURhgUO2pWyuZpXlJGY,3671
-anemoi/datasets/data/unchecked.py,sha256=LSBLSQXzkLhoprkI2PY6OEoeX0lVT-nIe-ZyibH2jv0,4100
+anemoi/datasets/data/unchecked.py,sha256=qeUKthbvVVSPH-P366q1DEofvPzZSSXCXA49x-RkBOc,4038
 anemoi/datasets/dates/__init__.py,sha256=zOph2N_mXYbjSvqEWYF1mmm-UZpljb61WLrdFJmi0qQ,4469
 anemoi/datasets/dates/groups.py,sha256=iq310Pi7ullglOhcNblv14MmcT8FPgYCD5s45qAfV_s,3383
 anemoi/datasets/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-anemoi_datasets-0.2.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-anemoi_datasets-0.2.1.dist-info/METADATA,sha256=2RnNBqotAC66veovXZnAhzlcbN5V9qIHeQ7DQAFgIMs,15628
-anemoi_datasets-0.2.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-anemoi_datasets-0.2.1.dist-info/entry_points.txt,sha256=yR-o-4uiPEA_GLBL81SkMYnUoxq3CAV3hHulQiRtGG0,66
-anemoi_datasets-0.2.1.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
-anemoi_datasets-0.2.1.dist-info/RECORD,,
+anemoi_datasets-0.3.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+anemoi_datasets-0.3.1.dist-info/METADATA,sha256=cLrTNyT23kitgUq05PNMK5Ni-pI2AMwdzy4dOh7jZjo,16050
+anemoi_datasets-0.3.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+anemoi_datasets-0.3.1.dist-info/entry_points.txt,sha256=yR-o-4uiPEA_GLBL81SkMYnUoxq3CAV3hHulQiRtGG0,66
+anemoi_datasets-0.3.1.dist-info/top_level.txt,sha256=DYn8VPs-fNwr7fNH9XIBqeXIwiYYd2E2k5-dUFFqUz0,7
+anemoi_datasets-0.3.1.dist-info/RECORD,,

{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{anemoi_datasets-0.2.1.dist-info → anemoi_datasets-0.3.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

anemoi-datasets 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

anemoi-datasets 0.2.1py3-none-any.whl → 0.3.1py3-none-any.whl