PyPI - anemoi-datasets - Versions diffs - 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl - Mend

anemoi-datasets 0.5.16py3-none-any.whl → 0.5.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (155) hide show

anemoi/datasets/__init__.py +4 -1
anemoi/datasets/__main__.py +12 -2
anemoi/datasets/_version.py +9 -4
anemoi/datasets/commands/cleanup.py +17 -2
anemoi/datasets/commands/compare.py +18 -2
anemoi/datasets/commands/copy.py +196 -14
anemoi/datasets/commands/create.py +50 -7
anemoi/datasets/commands/finalise-additions.py +17 -2
anemoi/datasets/commands/finalise.py +17 -2
anemoi/datasets/commands/init-additions.py +17 -2
anemoi/datasets/commands/init.py +16 -2
anemoi/datasets/commands/inspect.py +283 -62
anemoi/datasets/commands/load-additions.py +16 -2
anemoi/datasets/commands/load.py +16 -2
anemoi/datasets/commands/patch.py +17 -2
anemoi/datasets/commands/publish.py +17 -2
anemoi/datasets/commands/scan.py +31 -3
anemoi/datasets/compute/recentre.py +47 -11
anemoi/datasets/create/__init__.py +612 -85
anemoi/datasets/create/check.py +142 -20
anemoi/datasets/create/chunks.py +64 -4
anemoi/datasets/create/config.py +185 -21
anemoi/datasets/create/filter.py +50 -0
anemoi/datasets/create/filters/__init__.py +33 -0
anemoi/datasets/create/filters/empty.py +37 -0
anemoi/datasets/create/filters/legacy.py +93 -0
anemoi/datasets/create/filters/noop.py +37 -0
anemoi/datasets/create/filters/orog_to_z.py +58 -0
anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
anemoi/datasets/create/filters/rename.py +205 -0
anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
anemoi/datasets/create/filters/transform.py +53 -0
anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
anemoi/datasets/create/input/__init__.py +76 -5
anemoi/datasets/create/input/action.py +149 -13
anemoi/datasets/create/input/concat.py +81 -10
anemoi/datasets/create/input/context.py +39 -4
anemoi/datasets/create/input/data_sources.py +72 -6
anemoi/datasets/create/input/empty.py +21 -3
anemoi/datasets/create/input/filter.py +60 -12
anemoi/datasets/create/input/function.py +154 -37
anemoi/datasets/create/input/join.py +86 -14
anemoi/datasets/create/input/misc.py +67 -17
anemoi/datasets/create/input/pipe.py +33 -6
anemoi/datasets/create/input/repeated_dates.py +189 -41
anemoi/datasets/create/input/result.py +202 -87
anemoi/datasets/create/input/step.py +119 -22
anemoi/datasets/create/input/template.py +100 -13
anemoi/datasets/create/input/trace.py +62 -7
anemoi/datasets/create/patch.py +52 -4
anemoi/datasets/create/persistent.py +134 -17
anemoi/datasets/create/size.py +15 -1
anemoi/datasets/create/source.py +51 -0
anemoi/datasets/create/sources/__init__.py +36 -0
anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
anemoi/datasets/create/sources/empty.py +37 -0
anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
anemoi/datasets/create/sources/grib.py +297 -0
anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
anemoi/datasets/create/sources/legacy.py +93 -0
anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
anemoi/datasets/create/sources/netcdf.py +42 -0
anemoi/datasets/create/sources/opendap.py +43 -0
anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
anemoi/datasets/create/sources/recentre.py +150 -0
anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
anemoi/datasets/create/sources/xarray.py +92 -0
anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
anemoi/datasets/create/sources/xarray_support/README.md +1 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
anemoi/datasets/create/sources/xarray_support/time.py +391 -0
anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
anemoi/datasets/create/sources/xarray_zarr.py +41 -0
anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
anemoi/datasets/create/statistics/__init__.py +233 -44
anemoi/datasets/create/statistics/summary.py +52 -6
anemoi/datasets/create/testing.py +76 -0
anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
anemoi/datasets/create/utils.py +97 -6
anemoi/datasets/create/writer.py +26 -4
anemoi/datasets/create/zarr.py +170 -23
anemoi/datasets/data/__init__.py +51 -4
anemoi/datasets/data/complement.py +191 -40
anemoi/datasets/data/concat.py +141 -16
anemoi/datasets/data/dataset.py +552 -61
anemoi/datasets/data/debug.py +197 -26
anemoi/datasets/data/ensemble.py +93 -8
anemoi/datasets/data/fill_missing.py +165 -18
anemoi/datasets/data/forwards.py +428 -56
anemoi/datasets/data/grids.py +323 -97
anemoi/datasets/data/indexing.py +112 -19
anemoi/datasets/data/interpolate.py +92 -12
anemoi/datasets/data/join.py +158 -19
anemoi/datasets/data/masked.py +129 -15
anemoi/datasets/data/merge.py +137 -23
anemoi/datasets/data/misc.py +172 -16
anemoi/datasets/data/missing.py +233 -29
anemoi/datasets/data/rescale.py +111 -10
anemoi/datasets/data/select.py +168 -26
anemoi/datasets/data/statistics.py +67 -6
anemoi/datasets/data/stores.py +149 -64
anemoi/datasets/data/subset.py +159 -25
anemoi/datasets/data/unchecked.py +168 -57
anemoi/datasets/data/xy.py +168 -25
anemoi/datasets/dates/__init__.py +191 -16
anemoi/datasets/dates/groups.py +189 -47
anemoi/datasets/grids.py +270 -31
anemoi/datasets/testing.py +28 -1
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
anemoi/datasets/create/functions/__init__.py +0 -66
anemoi/datasets/create/functions/filters/__init__.py +0 -9
anemoi/datasets/create/functions/filters/empty.py +0 -17
anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
anemoi/datasets/create/functions/filters/rename.py +0 -79
anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
anemoi/datasets/create/functions/sources/empty.py +0 -15
anemoi/datasets/create/functions/sources/grib.py +0 -150
anemoi/datasets/create/functions/sources/netcdf.py +0 -15
anemoi/datasets/create/functions/sources/opendap.py +0 -15
anemoi/datasets/create/functions/sources/recentre.py +0 -60
anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
anemoi/datasets/utils/fields.py +0 -47
anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/__init__.py CHANGED Viewed

@@ -15,10 +15,14 @@ import time
 import uuid
 import warnings
 from functools import cached_property
+from typing import Any
+from typing import Optional
+from typing import Union
 import cftime
 import numpy as np
 import tqdm
+import zarr
 from anemoi.utils.dates import as_datetime
 from anemoi.utils.dates import frequency_to_string
 from anemoi.utils.dates import frequency_to_timedelta
@@ -55,8 +59,19 @@ LOG = logging.getLogger(__name__)
 VERSION = "0.30"
-def json_tidy(o):
+def json_tidy(o: Any) -> Any:
+    """Convert various types to JSON serializable format.
+    Parameters
+    ----------
+    o : Any
+        The object to convert.
+    Returns
+    -------
+    Any
+        The JSON serializable object.
+    """
     if isinstance(o, datetime.datetime):
         return o.isoformat()
@@ -85,12 +100,24 @@ def json_tidy(o):
     raise TypeError(f"{repr(o)} is not JSON serializable {type(o)}")
-def build_statistics_dates(dates, start, end):
-    """Compute the start and end dates for the statistics, based on :
-    - The start and end dates in the config
-    - The default statistics dates convention
-    Then adapt according to the actual dates in the dataset.
+def build_statistics_dates(
+    dates: list[datetime.datetime], start: Optional[datetime.datetime], end: Optional[datetime.datetime]
+) -> tuple[str, str]:
+    """Compute the start and end dates for the statistics.
+    Parameters
+    ----------
+    dates : list of datetime.datetime
+        The list of dates.
+    start : Optional[datetime.datetime]
+        The start date.
+    end : Optional[datetime.datetime]
+        The end date.
+    Returns
+    -------
+    tuple of str
+        The start and end dates in ISO format.
     """
     # if not specified, use the default statistics dates
     default_start, default_end = default_statistics_dates(dates)
@@ -109,7 +136,19 @@ def build_statistics_dates(dates, start, end):
     return (start.isoformat(), end.isoformat())
-def _path_readable(path):
+def _path_readable(path: str) -> bool:
+    """Check if the path is readable.
+    Parameters
+    ----------
+    path : str
+        The path to check.
+    Returns
+    -------
+    bool
+        True if the path is readable, False otherwise.
+    """
     import zarr
     try:
@@ -120,14 +159,37 @@ def _path_readable(path):
 class Dataset:
-    def __init__(self, path):
+    """A class to represent a dataset."""
+    def __init__(self, path: str):
+        """Initialize a Dataset instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        """
         self.path = path
         _, ext = os.path.splitext(self.path)
         if ext != ".zarr":
             raise ValueError(f"Unsupported extension={ext} for path={self.path}")
-    def add_dataset(self, mode="r+", **kwargs):
+    def add_dataset(self, mode: str = "r+", **kwargs: Any) -> zarr.Array:
+        """Add a dataset to the Zarr store.
+        Parameters
+        ----------
+        mode : str, optional
+            The mode to open the Zarr store.
+        **kwargs
+            Additional arguments for the dataset.
+        Returns
+        -------
+        zarr.Array
+            The added dataset.
+        """
         import zarr
         z = zarr.open(self.path, mode=mode)
@@ -135,7 +197,14 @@ class Dataset:
         return add_zarr_dataset(zarr_root=z, **kwargs)
-    def update_metadata(self, **kwargs):
+    def update_metadata(self, **kwargs: Any) -> None:
+        """Update the metadata of the dataset.
+        Parameters
+        ----------
+        **kwargs
+            The metadata to update.
+        """
         import zarr
         LOG.debug(f"Updating metadata {kwargs}")
@@ -148,16 +217,19 @@ class Dataset:
             z.attrs[k] = json.loads(json.dumps(v, default=json_tidy))
     @cached_property
-    def anemoi_dataset(self):
+    def anemoi_dataset(self) -> Any:
+        """Get the Anemoi dataset."""
         return open_dataset(self.path)
     @cached_property
-    def zarr_metadata(self):
+    def zarr_metadata(self) -> dict:
+        """Get the Zarr metadata."""
         import zarr
         return dict(zarr.open(self.path, mode="r").attrs)
-    def print_info(self):
+    def print_info(self) -> None:
+        """Print information about the dataset."""
         import zarr
         z = zarr.open(self.path, mode="r")
@@ -166,13 +238,42 @@ class Dataset:
         except Exception as e:
             LOG.info(e)
-    def get_zarr_chunks(self):
+    def get_zarr_chunks(self) -> tuple:
+        """Get the chunks of the Zarr dataset.
+        Returns
+        -------
+        tuple
+            The chunks of the Zarr dataset.
+        """
         import zarr
         z = zarr.open(self.path, mode="r")
         return z["data"].chunks
-    def check_name(self, resolution, dates, frequency, raise_exception=True, is_test=False):
+    def check_name(
+        self,
+        resolution: str,
+        dates: list[datetime.datetime],
+        frequency: datetime.timedelta,
+        raise_exception: bool = True,
+        is_test: bool = False,
+    ) -> None:
+        """Check the name of the dataset.
+        Parameters
+        ----------
+        resolution : str
+            The resolution of the dataset.
+        dates : list of datetime.datetime
+            The dates of the dataset.
+        frequency : datetime.timedelta
+            The frequency of the dataset.
+        raise_exception : bool, optional
+            Whether to raise an exception if the name is invalid.
+        is_test : bool, optional
+            Whether this is a test.
+        """
         basename, _ = os.path.splitext(os.path.basename(self.path))
         try:
             DatasetName(basename, resolution, dates[0], dates[-1], frequency).raise_if_not_valid()
@@ -182,8 +283,14 @@ class Dataset:
             else:
                 LOG.warning(f"Dataset name error: {e}")
-    def get_main_config(self):
-        """Returns None if the config is not found."""
+    def get_main_config(self) -> Any:
+        """Get the main configuration of the dataset.
+        Returns
+        -------
+        Any
+            The main configuration.
+        """
         import zarr
         z = zarr.open(self.path, mode="r")
@@ -191,7 +298,16 @@ class Dataset:
 class WritableDataset(Dataset):
-    def __init__(self, path):
+    """A class to represent a writable dataset."""
+    def __init__(self, path: str):
+        """Initialize a WritableDataset instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        """
         super().__init__(path)
         self.path = path
@@ -200,14 +316,26 @@ class WritableDataset(Dataset):
         self.z = zarr.open(self.path, mode="r+")
     @cached_property
-    def data_array(self):
+    def data_array(self) -> Any:
+        """Get the data array of the dataset."""
         import zarr
         return zarr.open(self.path, mode="r+")["data"]
 class NewDataset(Dataset):
-    def __init__(self, path, overwrite=False):
+    """A class to represent a new dataset."""
+    def __init__(self, path: str, overwrite: bool = False):
+        """Initialize a NewDataset instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        overwrite : bool, optional
+            Whether to overwrite the existing dataset.
+        """
         super().__init__(path)
         self.path = path
@@ -218,9 +346,20 @@ class NewDataset(Dataset):
 class Actor:  # TODO: rename to Creator
+    """A base class for dataset creation actors."""
     dataset_class = WritableDataset
-    def __init__(self, path, cache=None):
+    def __init__(self, path: str, cache: Optional[str] = None):
+        """Initialize an Actor instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        cache : Optional[str], optional
+            The cache directory.
+        """
         # Catch all floating point errors, including overflow, sqrt(<0), etc
         np.seterr(all="raise", under="warn")
@@ -228,23 +367,52 @@ class Actor:  # TODO: rename to Creator
         self.cache = cache
         self.dataset = self.dataset_class(self.path)
-    def run(self):
+    def run(self) -> None:
+        """Run the actor."""
         # to be implemented in the sub-classes
         raise NotImplementedError()
-    def update_metadata(self, **kwargs):
+    def update_metadata(self, **kwargs: Any) -> None:
+        """Update the metadata of the dataset.
+        Parameters
+        ----------
+        **kwargs
+            The metadata to update.
+        """
         self.dataset.update_metadata(**kwargs)
-    def _cache_context(self):
+    def _cache_context(self) -> Any:
+        """Get the cache context.
+        Returns
+        -------
+        Any
+            The cache context.
+        """
         from .utils import cache_context
         return cache_context(self.cache)
-    def check_unkown_kwargs(self, kwargs):
+    def check_unkown_kwargs(self, kwargs: dict) -> None:
+        """Check for unknown keyword arguments.
+        Parameters
+        ----------
+        kwargs : dict
+            The keyword arguments.
+        """
         # remove this latter
         LOG.warning(f"💬 Unknown kwargs for {self.__class__.__name__}: {kwargs}")
-    def read_dataset_metadata(self, path):
+    def read_dataset_metadata(self, path: str) -> None:
+        """Read the metadata of the dataset.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        """
         ds = open_dataset(path)
         self.dataset_shape = ds.shape
         self.variables_names = ds.variables
@@ -253,7 +421,19 @@ class Actor:  # TODO: rename to Creator
         self.missing_dates = sorted(list([self.dates[i] for i in ds.missing]))
-        def check_missing_dates(expected):
+        def check_missing_dates(expected: list[np.datetime64]) -> None:
+            """Check if the missing dates in the dataset match the expected dates.
+            Parameters
+            ----------
+            expected : list of np.datetime64
+                The expected missing dates.
+            Raises
+            ------
+            ValueError
+                If the missing dates in the dataset do not match the expected dates.
+            """
             import zarr
             z = zarr.open(path, "r")
@@ -269,21 +449,43 @@ class Actor:  # TODO: rename to Creator
 class Patch(Actor):
-    def __init__(self, path, options=None, **kwargs):
+    """A class to apply patches to a dataset."""
+    def __init__(self, path: str, options: dict = None, **kwargs: Any):
+        """Initialize a Patch instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        options : dict, optional
+            The patch options.
+        """
         self.path = path
         self.options = options or {}
-    def run(self):
+    def run(self) -> None:
+        """Run the patch."""
         from .patch import apply_patch
         apply_patch(self.path, **self.options)
 class Size(Actor):
-    def __init__(self, path, **kwargs):
+    """A class to compute the size of a dataset."""
+    def __init__(self, path: str, **kwargs: Any):
+        """Initialize a Size instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        """
         super().__init__(path)
-    def run(self):
+    def run(self) -> None:
+        """Run the size computation."""
         from .size import compute_directory_sizes
         metadata = compute_directory_sizes(self.path)
@@ -301,23 +503,37 @@ class Size(Actor):
 class HasRegistryMixin:
+    """A mixin class to provide registry functionality."""
     @cached_property
-    def registry(self):
+    def registry(self) -> Any:
+        """Get the registry."""
         from .zarr import ZarrBuiltRegistry
         return ZarrBuiltRegistry(self.path, use_threads=self.use_threads)
 class HasStatisticTempMixin:
+    """A mixin class to provide temporary statistics functionality."""
     @cached_property
-    def tmp_statistics(self):
+    def tmp_statistics(self) -> TmpStatistics:
+        """Get the temporary statistics."""
         directory = self.statistics_temp_dir or os.path.join(self.path + ".storage_for_statistics.tmp")
         return TmpStatistics(directory)
 class HasElementForDataMixin:
-    def create_elements(self, config):
+    """A mixin class to provide element creation functionality for data."""
+    def create_elements(self, config: Any) -> None:
+        """Create elements for the dataset.
+        Parameters
+        ----------
+        config : Any
+            The configuration.
+        """
         assert self.registry
         assert self.tmp_statistics
@@ -329,11 +545,24 @@ class HasElementForDataMixin:
         self.output = build_output(config.output, parent=self)
         self.input = build_input_(main_config=config, output_config=self.output)
-        LOG.info("%s", self.input)
+        # LOG.info("%s", self.input)
-def build_input_(main_config, output_config):
+def build_input_(main_config: Any, output_config: Any) -> Any:
+    """Build the input for the dataset.
+    Parameters
+    ----------
+    main_config : Any
+        The main configuration.
+    output_config : Any
+        The output configuration.
+    Returns
+    -------
+    Any
+        The input builder.
+    """
     builder = build_input(
         main_config.input,
         data_sources=main_config.get("data_sources", {}),
@@ -348,21 +577,46 @@ def build_input_(main_config, output_config):
 class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
+    """A class to initialize a new dataset."""
     dataset_class = NewDataset
     def __init__(
         self,
-        path,
-        config,
-        check_name=False,
-        overwrite=False,
-        use_threads=False,
-        statistics_temp_dir=None,
-        progress=None,
-        test=False,
-        cache=None,
-        **kwargs,
+        path: str,
+        config: dict,
+        check_name: bool = False,
+        overwrite: bool = False,
+        use_threads: bool = False,
+        statistics_temp_dir: Optional[str] = None,
+        progress: Any = None,
+        test: bool = False,
+        cache: Optional[str] = None,
+        **kwargs: Any,
     ):
+        """Initialize an Init instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        config : dict
+            The configuration.
+        check_name : bool, optional
+            Whether to check the dataset name.
+        overwrite : bool, optional
+            Whether to overwrite the existing dataset.
+        use_threads : bool, optional
+            Whether to use threads.
+        statistics_temp_dir : Optional[str], optional
+            The directory for temporary statistics.
+        progress : Any, optional
+            The progress indicator.
+        test : bool, optional
+            Whether this is a test.
+        cache : Optional[str], optional
+            The cache directory.
+        """
         if _path_readable(path) and not overwrite:
             raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
@@ -390,12 +644,26 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         LOG.info(f"Minimal input for 'init' step (using only the first date) : {one_date}")
         LOG.info(self.minimal_input)
-    def run(self):
+    def run(self) -> int:
+        """Run the initialization.
+        Returns
+        -------
+        int
+            The number of groups to process.
+        """
         with self._cache_context():
             return self._run()
-    def _run(self):
-        """Create an empty dataset of the right final shape
+    def _run(self) -> int:
+        """Internal method to run the initialization.
+        Returns
+        -------
+        int
+            The number of groups to process.
+        """
+        """Create an empty dataset of the right final shape.
         Read a small part of the data to get the shape of the data and the resolution and more metadata.
         """
@@ -547,9 +815,35 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
 class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
+    """A class to load data into a dataset."""
     def __init__(
-        self, path, parts=None, use_threads=False, statistics_temp_dir=None, progress=None, cache=None, **kwargs
+        self,
+        path: str,
+        parts: Optional[str] = None,
+        use_threads: bool = False,
+        statistics_temp_dir: Optional[str] = None,
+        progress: Any = None,
+        cache: Optional[str] = None,
+        **kwargs: Any,
     ):
+        """Initialize a Load instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        parts : Optional[str], optional
+            The parts to load.
+        use_threads : bool, optional
+            Whether to use threads.
+        statistics_temp_dir : Optional[str], optional
+            The directory for temporary statistics.
+        progress : Any, optional
+            The progress indicator.
+        cache : Optional[str], optional
+            The cache directory.
+        """
         super().__init__(path, cache=cache)
         self.use_threads = use_threads
         self.statistics_temp_dir = statistics_temp_dir
@@ -567,11 +861,13 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         self.data_array = self.dataset.data_array
         self.n_groups = len(self.groups)
-    def run(self):
+    def run(self) -> None:
+        """Run the data loading."""
         with self._cache_context():
             self._run()
-    def _run(self):
+    def _run(self) -> None:
+        """Internal method to run the data loading."""
         for igroup, group in enumerate(self.groups):
             if not self.chunk_filter(igroup):
                 continue
@@ -595,7 +891,14 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         self.dataset.print_info()
-    def load_result(self, result):
+    def load_result(self, result: Any) -> None:
+        """Load the result into the dataset.
+        Parameters
+        ----------
+        result : Any
+            The result to load.
+        """
         # There is one cube to load for each result.
         dates = list(result.group_of_dates)
@@ -656,14 +959,30 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         array.flush()
         LOG.info("Flushed data array")
-    def _get_allow_nans(self):
+    def _get_allow_nans(self) -> Union[bool, list]:
+        """Get the allow_nans configuration.
+        Returns
+        -------
+        bool | list
+            The allow_nans configuration.
+        """
         config = self.main_config
         if "allow_nans" in config.build:
             return config.build.allow_nans
         return config.statistics.get("allow_nans", [])
-    def load_cube(self, cube, array):
+    def load_cube(self, cube: Any, array: ViewCacheArray) -> None:
+        """Load the cube into the array.
+        Parameters
+        ----------
+        cube : Any
+            The cube to load.
+        array : ViewCacheArray
+            The array to load into.
+        """
         # There are several cubelets for each cube
         start = time.time()
         load = 0
@@ -673,7 +992,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         total = cube.count(reading_chunks)
         LOG.debug(f"Loading datacube: {cube}")
-        def position(x):
+        def position(x: Any) -> Optional[int]:
             if isinstance(x, str) and "/" in x:
                 x = x.split("/")
                 return int(x[0])
@@ -715,7 +1034,29 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
 class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
-    def __init__(self, path, statistics_temp_dir=None, delta=[], use_threads=False, **kwargs):
+    """A class to clean up temporary data and registry entries."""
+    def __init__(
+        self,
+        path: str,
+        statistics_temp_dir: Optional[str] = None,
+        delta: list = [],
+        use_threads: bool = False,
+        **kwargs: Any,
+    ):
+        """Initialize a Cleanup instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        statistics_temp_dir : Optional[str], optional
+            The directory for temporary statistics.
+        delta : list, optional
+            The delta values.
+        use_threads : bool, optional
+            Whether to use threads.
+        """
         super().__init__(path)
         self.use_threads = use_threads
         self.statistics_temp_dir = statistics_temp_dir
@@ -725,7 +1066,8 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
             for d in delta
         ]
-    def run(self):
+    def run(self) -> None:
+        """Run the cleanup."""
         self.tmp_statistics.delete()
         self.registry.clean()
         for actor in self.actors:
@@ -733,16 +1075,35 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
 class Verify(Actor):
-    def __init__(self, path, **kwargs):
+    """A class to verify the integrity of a dataset."""
+    def __init__(self, path: str, **kwargs: Any):
+        """Initialize a Verify instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        """
         super().__init__(path)
-    def run(self):
+    def run(self) -> None:
+        """Run the verification."""
         LOG.info(f"Verifying dataset at {self.path}")
         LOG.info(str(self.dataset.anemoi_dataset))
 class AdditionsMixin:
-    def skip(self):
+    """A mixin class to handle dataset additions."""
+    def skip(self) -> bool:
+        """Check if the additions should be skipped.
+        Returns
+        -------
+        bool
+            Whether to skip the additions.
+        """
         frequency = frequency_to_timedelta(self.dataset.anemoi_dataset.frequency)
         if not self.delta.total_seconds() % frequency.total_seconds() == 0:
             LOG.debug(f"Delta {self.delta} is not a multiple of frequency {frequency}. Skipping.")
@@ -755,13 +1116,15 @@ class AdditionsMixin:
         return False
     @cached_property
-    def tmp_storage_path(self):
+    def tmp_storage_path(self) -> str:
+        """Get the path to the temporary storage."""
         name = "storage_for_additions"
         if self.delta:
             name += frequency_to_string(self.delta)
         return os.path.join(f"{self.path}.{name}.tmp")
-    def read_from_dataset(self):
+    def read_from_dataset(self) -> None:
+        """Read data from the dataset."""
         self.variables = self.dataset.anemoi_dataset.variables
         self.frequency = frequency_to_timedelta(self.dataset.anemoi_dataset.frequency)
         start = self.dataset.zarr_metadata["statistics_start_date"]
@@ -780,11 +1143,34 @@ class AdditionsMixin:
 class DeltaDataset:
-    def __init__(self, ds, idelta):
+    """A class to represent a dataset with delta values."""
+    def __init__(self, ds: Any, idelta: int):
+        """Initialize a DeltaDataset instance.
+        Parameters
+        ----------
+        ds : Any
+            The dataset.
+        idelta : int
+            The delta value.
+        """
         self.ds = ds
         self.idelta = idelta
-    def __getitem__(self, i):
+    def __getitem__(self, i: int) -> Any:
+        """Get an item from the dataset.
+        Parameters
+        ----------
+        i : int
+            The index.
+        Returns
+        -------
+        Any
+            The item.
+        """
         j = i - self.idelta
         if j < 0:
             raise MissingDateError(f"Missing date {j}")
@@ -792,13 +1178,29 @@ class DeltaDataset:
 class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
-    def __init__(self, path, delta, use_threads=False, progress=None, **kwargs):
+    """A class to initialize dataset additions."""
+    def __init__(self, path: str, delta: str, use_threads: bool = False, progress: Any = None, **kwargs: Any):
+        """Initialize an _InitAdditions instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        delta : str
+            The delta value.
+        use_threads : bool, optional
+            Whether to use threads.
+        progress : Any, optional
+            The progress indicator.
+        """
         super().__init__(path)
         self.delta = frequency_to_timedelta(delta)
         self.use_threads = use_threads
         self.progress = progress
-    def run(self):
+    def run(self) -> None:
+        """Run the additions initialization."""
         if self.skip():
             LOG.info(f"Skipping delta={self.delta}")
             return
@@ -808,14 +1210,40 @@ class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
         self.tmp_storage.create()
         LOG.info(f"Dataset {self.tmp_storage_path} additions initialized.")
-    def cleanup(self):
+    def cleanup(self) -> None:
+        """Clean up the temporary storage."""
         self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
         self.tmp_storage.delete()
         LOG.info(f"Cleaned temporary storage {self.tmp_storage_path}")
 class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
-    def __init__(self, path, delta, parts=None, use_threads=False, progress=None, **kwargs):
+    """A class to run dataset additions."""
+    def __init__(
+        self,
+        path: str,
+        delta: str,
+        parts: Optional[str] = None,
+        use_threads: bool = False,
+        progress: Any = None,
+        **kwargs: Any,
+    ):
+        """Initialize a _RunAdditions instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        delta : str
+            The delta value.
+        parts : Optional[str], optional
+            The parts to load.
+        use_threads : bool, optional
+            Whether to use threads.
+        progress : Any, optional
+            The progress indicator.
+        """
         super().__init__(path)
         self.delta = frequency_to_timedelta(delta)
         self.use_threads = use_threads
@@ -825,7 +1253,8 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
         self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
         LOG.info(f"Writing in {self.tmp_storage_path}")
-    def run(self):
+    def run(self) -> None:
+        """Run the additions."""
         if self.skip():
             LOG.info(f"Skipping delta={self.delta}")
             return
@@ -846,7 +1275,14 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
         self.tmp_storage.flush()
         LOG.debug(f"Dataset {self.path} additions run.")
-    def allow_nans(self):
+    def allow_nans(self) -> bool:
+        """Check if NaNs are allowed.
+        Returns
+        -------
+        bool
+            Whether NaNs are allowed.
+        """
         if self.dataset.anemoi_dataset.metadata.get("allow_nans", False):
             return True
@@ -858,7 +1294,22 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
 class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
-    def __init__(self, path, delta, use_threads=False, progress=None, **kwargs):
+    """A class to finalize dataset additions."""
+    def __init__(self, path: str, delta: str, use_threads: bool = False, progress: Any = None, **kwargs: Any):
+        """Initialize a _FinaliseAdditions instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        delta : str
+            The delta value.
+        use_threads : bool, optional
+            Whether to use threads.
+        progress : Any, optional
+            The progress indicator.
+        """
         super().__init__(path)
         self.delta = frequency_to_timedelta(delta)
         self.use_threads = use_threads
@@ -867,7 +1318,8 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
         self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
         LOG.info(f"Reading from {self.tmp_storage_path}.")
-    def run(self):
+    def run(self) -> None:
+        """Run the additions finalization."""
         if self.skip():
             LOG.info(f"Skipping delta={self.delta}.")
             return
@@ -969,7 +1421,14 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
         self._write(self.summary)
         self.tmp_storage.delete()
-    def _write(self, summary):
+    def _write(self, summary: Summary) -> None:
+        """Write the summary to the dataset.
+        Parameters
+        ----------
+        summary : Summary
+            The summary to write.
+        """
         for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
             name = f"statistics_tendencies_{frequency_to_string(self.delta)}_{k}"
             self.dataset.add_dataset(name=name, array=summary[k], dimensions=("variable",))
@@ -977,9 +1436,22 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
         LOG.debug(f"Wrote additions in {self.path}")
-def multi_addition(cls):
+def multi_addition(cls: type) -> type:
+    """Create a class to handle multiple additions.
+    Parameters
+    ----------
+    cls : type
+        The class to handle additions.
+    Returns
+    -------
+    type
+        The class to handle multiple additions.
+    """
     class MultiAdditions:
-        def __init__(self, *args, **kwargs):
+        def __init__(self, *args, **kwargs: Any):
             self.actors = []
             for k in kwargs.pop("delta", []):
@@ -988,7 +1460,8 @@ def multi_addition(cls):
             if not self.actors:
                 LOG.warning("No delta found in kwargs, no additions will be computed.")
-        def run(self):
+        def run(self) -> None:
+            """Run the additions."""
             for actor in self.actors:
                 actor.run()
@@ -1001,13 +1474,36 @@ FinaliseAdditions = multi_addition(_FinaliseAdditions)
 class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
-    def __init__(self, path, use_threads=False, statistics_temp_dir=None, progress=None, **kwargs):
+    """A class to compute statistics for a dataset."""
+    def __init__(
+        self,
+        path: str,
+        use_threads: bool = False,
+        statistics_temp_dir: Optional[str] = None,
+        progress: Any = None,
+        **kwargs: Any,
+    ):
+        """Initialize a Statistics instance.
+        Parameters
+        ----------
+        path : str
+            The path to the dataset.
+        use_threads : bool, optional
+            Whether to use threads.
+        statistics_temp_dir : Optional[str], optional
+            The directory for temporary statistics.
+        progress : Any, optional
+            The progress indicator.
+        """
         super().__init__(path)
         self.use_threads = use_threads
         self.progress = progress
         self.statistics_temp_dir = statistics_temp_dir
-    def run(self):
+    def run(self) -> None:
+        """Run the statistics computation."""
         start, end = (
             self.dataset.zarr_metadata["statistics_start_date"],
             self.dataset.zarr_metadata["statistics_end_date"],
@@ -1034,7 +1530,8 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
         LOG.info(f"Wrote statistics in {self.path}")
     @cached_property
-    def allow_nans(self):
+    def allow_nans(self) -> Union[bool, list]:
+        """Check if NaNs are allowed."""
         import zarr
         z = zarr.open(self.path, mode="r")
@@ -1048,12 +1545,26 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
         return True
-def chain(tasks):
+def chain(tasks: list) -> type:
+    """Create a class to chain multiple tasks.
+    Parameters
+    ----------
+    tasks : list
+        The list of tasks to chain.
+    Returns
+    -------
+    type
+        The class to chain multiple tasks.
+    """
     class Chain(Actor):
-        def __init__(self, **kwargs):
+        def __init__(self, **kwargs: Any):
             self.kwargs = kwargs
-        def run(self):
+        def run(self) -> None:
+            """Run the chained tasks."""
             for cls in tasks:
                 t = cls(**self.kwargs)
                 t.run()
@@ -1061,7 +1572,23 @@ def chain(tasks):
     return Chain
-def creator_factory(name, trace=None, **kwargs):
+def creator_factory(name: str, trace: Optional[str] = None, **kwargs: Any) -> Any:
+    """Create a dataset creator.
+    Parameters
+    ----------
+    name : str
+        The name of the creator.
+    trace : Optional[str], optional
+        The trace file.
+    **kwargs
+        Additional arguments for the creator.
+    Returns
+    -------
+    Any
+        The dataset creator.
+    """
     if trace:
         enable_trace(trace)

anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl

anemoi-datasets 0.5.16py3-none-any.whl → 0.5.17py3-none-any.whl