PyPI - anemoi-datasets - Versions diffs - 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl - Mend

anemoi-datasets 0.5.16py3-none-any.whl → 0.5.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (155) hide show

anemoi/datasets/__init__.py +4 -1
anemoi/datasets/__main__.py +12 -2
anemoi/datasets/_version.py +9 -4
anemoi/datasets/commands/cleanup.py +17 -2
anemoi/datasets/commands/compare.py +18 -2
anemoi/datasets/commands/copy.py +196 -14
anemoi/datasets/commands/create.py +50 -7
anemoi/datasets/commands/finalise-additions.py +17 -2
anemoi/datasets/commands/finalise.py +17 -2
anemoi/datasets/commands/init-additions.py +17 -2
anemoi/datasets/commands/init.py +16 -2
anemoi/datasets/commands/inspect.py +283 -62
anemoi/datasets/commands/load-additions.py +16 -2
anemoi/datasets/commands/load.py +16 -2
anemoi/datasets/commands/patch.py +17 -2
anemoi/datasets/commands/publish.py +17 -2
anemoi/datasets/commands/scan.py +31 -3
anemoi/datasets/compute/recentre.py +47 -11
anemoi/datasets/create/__init__.py +612 -85
anemoi/datasets/create/check.py +142 -20
anemoi/datasets/create/chunks.py +64 -4
anemoi/datasets/create/config.py +185 -21
anemoi/datasets/create/filter.py +50 -0
anemoi/datasets/create/filters/__init__.py +33 -0
anemoi/datasets/create/filters/empty.py +37 -0
anemoi/datasets/create/filters/legacy.py +93 -0
anemoi/datasets/create/filters/noop.py +37 -0
anemoi/datasets/create/filters/orog_to_z.py +58 -0
anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
anemoi/datasets/create/filters/rename.py +205 -0
anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
anemoi/datasets/create/filters/transform.py +53 -0
anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
anemoi/datasets/create/input/__init__.py +76 -5
anemoi/datasets/create/input/action.py +149 -13
anemoi/datasets/create/input/concat.py +81 -10
anemoi/datasets/create/input/context.py +39 -4
anemoi/datasets/create/input/data_sources.py +72 -6
anemoi/datasets/create/input/empty.py +21 -3
anemoi/datasets/create/input/filter.py +60 -12
anemoi/datasets/create/input/function.py +154 -37
anemoi/datasets/create/input/join.py +86 -14
anemoi/datasets/create/input/misc.py +67 -17
anemoi/datasets/create/input/pipe.py +33 -6
anemoi/datasets/create/input/repeated_dates.py +189 -41
anemoi/datasets/create/input/result.py +202 -87
anemoi/datasets/create/input/step.py +119 -22
anemoi/datasets/create/input/template.py +100 -13
anemoi/datasets/create/input/trace.py +62 -7
anemoi/datasets/create/patch.py +52 -4
anemoi/datasets/create/persistent.py +134 -17
anemoi/datasets/create/size.py +15 -1
anemoi/datasets/create/source.py +51 -0
anemoi/datasets/create/sources/__init__.py +36 -0
anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
anemoi/datasets/create/sources/empty.py +37 -0
anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
anemoi/datasets/create/sources/grib.py +297 -0
anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
anemoi/datasets/create/sources/legacy.py +93 -0
anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
anemoi/datasets/create/sources/netcdf.py +42 -0
anemoi/datasets/create/sources/opendap.py +43 -0
anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
anemoi/datasets/create/sources/recentre.py +150 -0
anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
anemoi/datasets/create/sources/xarray.py +92 -0
anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
anemoi/datasets/create/sources/xarray_support/README.md +1 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
anemoi/datasets/create/sources/xarray_support/time.py +391 -0
anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
anemoi/datasets/create/sources/xarray_zarr.py +41 -0
anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
anemoi/datasets/create/statistics/__init__.py +233 -44
anemoi/datasets/create/statistics/summary.py +52 -6
anemoi/datasets/create/testing.py +76 -0
anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
anemoi/datasets/create/utils.py +97 -6
anemoi/datasets/create/writer.py +26 -4
anemoi/datasets/create/zarr.py +170 -23
anemoi/datasets/data/__init__.py +51 -4
anemoi/datasets/data/complement.py +191 -40
anemoi/datasets/data/concat.py +141 -16
anemoi/datasets/data/dataset.py +552 -61
anemoi/datasets/data/debug.py +197 -26
anemoi/datasets/data/ensemble.py +93 -8
anemoi/datasets/data/fill_missing.py +165 -18
anemoi/datasets/data/forwards.py +428 -56
anemoi/datasets/data/grids.py +323 -97
anemoi/datasets/data/indexing.py +112 -19
anemoi/datasets/data/interpolate.py +92 -12
anemoi/datasets/data/join.py +158 -19
anemoi/datasets/data/masked.py +129 -15
anemoi/datasets/data/merge.py +137 -23
anemoi/datasets/data/misc.py +172 -16
anemoi/datasets/data/missing.py +233 -29
anemoi/datasets/data/rescale.py +111 -10
anemoi/datasets/data/select.py +168 -26
anemoi/datasets/data/statistics.py +67 -6
anemoi/datasets/data/stores.py +149 -64
anemoi/datasets/data/subset.py +159 -25
anemoi/datasets/data/unchecked.py +168 -57
anemoi/datasets/data/xy.py +168 -25
anemoi/datasets/dates/__init__.py +191 -16
anemoi/datasets/dates/groups.py +189 -47
anemoi/datasets/grids.py +270 -31
anemoi/datasets/testing.py +28 -1
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
anemoi/datasets/create/functions/__init__.py +0 -66
anemoi/datasets/create/functions/filters/__init__.py +0 -9
anemoi/datasets/create/functions/filters/empty.py +0 -17
anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
anemoi/datasets/create/functions/filters/rename.py +0 -79
anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
anemoi/datasets/create/functions/sources/empty.py +0 -15
anemoi/datasets/create/functions/sources/grib.py +0 -150
anemoi/datasets/create/functions/sources/netcdf.py +0 -15
anemoi/datasets/create/functions/sources/opendap.py +0 -15
anemoi/datasets/create/functions/sources/recentre.py +0 -60
anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
anemoi/datasets/utils/fields.py +0 -47
anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
{anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0

anemoi/datasets/data/dataset.py CHANGED Viewed

@@ -13,17 +13,59 @@ import json
 import logging
 import pprint
 import warnings
+from abc import ABC
+from abc import abstractmethod
 from functools import cached_property
+try:
+    from types import EllipsisType
+except ImportError:
+    # Python 3.9
+    EllipsisType = type(Ellipsis)
+from typing import TYPE_CHECKING
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Set
+from typing import Sized
+from typing import Tuple
+from typing import Union
 import numpy as np
 from anemoi.utils.dates import frequency_to_seconds
 from anemoi.utils.dates import frequency_to_string
 from anemoi.utils.dates import frequency_to_timedelta
+from numpy.typing import NDArray
+from .debug import Node
+from .debug import Source
+if TYPE_CHECKING:
+    import matplotlib
 LOG = logging.getLogger(__name__)
-def _tidy(v):
+Shape = Tuple[int, ...]
+TupleIndex = Tuple[Union[int, slice, EllipsisType], ...]
+FullIndex = Union[int, slice, TupleIndex]
+def _tidy(v: Any) -> Any:
+    """Tidy up the input value.
+    Parameters
+    ----------
+    v : Any
+        The input value to tidy up.
+    Returns
+    -------
+    Any
+        The tidied value.
+    """
     if isinstance(v, (list, tuple, set)):
         return [_tidy(i) for i in v]
     if isinstance(v, dict):
@@ -49,26 +91,53 @@ def _tidy(v):
     return v
-class Dataset:
-    arguments = {}
-    _name = None
+class Dataset(ABC, Sized):
+    arguments: Dict[str, Any] = {}
+    _name: Union[str, None] = None
     def mutate(self) -> "Dataset":
-        """Give an opportunity to a subclass to return a new Dataset
-        object of a different class, if needed.
-        """
+        """Give an opportunity to a subclass to return a new Dataset object of a different class, if needed.
+        Returns
+        -------
+        Dataset
+            The mutated dataset.
+        """
         return self
-    def swap_with_parent(self, parent):
+    def swap_with_parent(self, parent: "Dataset") -> "Dataset":
+        """Swap the current dataset with its parent dataset.
+        Parameters
+        ----------
+        parent : Dataset
+            The parent dataset.
+        Returns
+        -------
+        Dataset
+            The parent dataset.
+        """
         return parent
     @cached_property
-    def _len(self):
+    def _len(self) -> int:
+        """Cache and return the length of the dataset."""
         return len(self)
-    def _subset(self, **kwargs):
+    def _subset(self, **kwargs: Any) -> "Dataset":
+        """Create a subset of the dataset based on the provided keyword arguments.
+        Parameters
+        ----------
+        **kwargs : Any
+            Keyword arguments for creating the subset.
+        Returns
+        -------
+        Dataset
+            The subset of the dataset.
+        """
         if not kwargs:
             return self.mutate()
@@ -79,10 +148,23 @@ class Dataset:
         return result
     @property
-    def name(self):
+    def name(self) -> Union[str, None]:
+        """Return the name of the dataset."""
         return self._name
-    def __subset(self, **kwargs):
+    def __subset(self, **kwargs: Any) -> "Dataset":
+        """Internal method to create a subset of the dataset based on the provided keyword arguments.
+        Parameters
+        ----------
+        **kwargs : Any
+            Keyword arguments for creating the subset.
+        Returns
+        -------
+        Dataset
+            The subset of the dataset.
+        """
         if not kwargs:
             return self.mutate()
@@ -213,8 +295,19 @@ class Dataset:
         raise NotImplementedError("Unsupported arguments: " + ", ".join(kwargs))
-    def _frequency_to_indices(self, frequency):
+    def _frequency_to_indices(self, frequency: str) -> list[int]:
+        """Convert a frequency string to a list of indices.
+        Parameters
+        ----------
+        frequency : str
+            The frequency string.
+        Returns
+        -------
+        list of int
+            The list of indices.
+        """
         requested_frequency = frequency_to_seconds(frequency)
         dataset_frequency = frequency_to_seconds(self.frequency)
         assert requested_frequency % dataset_frequency == 0
@@ -223,12 +316,35 @@ class Dataset:
         return range(0, len(self), step)
-    def _shuffle_indices(self):
-        import numpy as np
+    def _shuffle_indices(self) -> NDArray[Any]:
+        """Return a shuffled array of indices.
+        Returns
+        -------
+        numpy.ndarray
+            The shuffled array of indices.
+        """
         return np.random.permutation(len(self))
-    def _dates_to_indices(self, start, end):
+    def _dates_to_indices(
+        self,
+        start: Union[None, str, datetime.datetime],
+        end: Union[None, str, datetime.datetime],
+    ) -> List[int]:
+        """Convert date range to a list of indices.
+        Parameters
+        ----------
+        start : None, str, or datetime.datetime
+            The start date.
+        end : None, str, or datetime.datetime
+            The end date.
+        Returns
+        -------
+        list of int
+            The list of indices.
+        """
         from .misc import as_first_date
         from .misc import as_last_date
@@ -239,7 +355,19 @@ class Dataset:
         return [i for i, date in enumerate(self.dates) if start <= date <= end]
-    def _select_to_columns(self, vars):
+    def _select_to_columns(self, vars: Union[str, List[str], Tuple[str], set]) -> List[int]:
+        """Convert variable names to a list of column indices.
+        Parameters
+        ----------
+        vars : str, list of str, tuple of str, or set
+            The variable names.
+        Returns
+        -------
+        list of int
+            The list of column indices.
+        """
         if isinstance(vars, set):
             # We keep the order of the variables as they are in the zarr file
             nvars = [v for v in self.name_to_index if v in vars]
@@ -251,7 +379,19 @@ class Dataset:
         return [self.name_to_index[v] for v in vars]
-    def _drop_to_columns(self, vars):
+    def _drop_to_columns(self, vars: Union[str, Sequence[str]]) -> List[int]:
+        """Convert variable names to a list of column indices to drop.
+        Parameters
+        ----------
+        vars : str, list of str, tuple of str, or set
+            The variable names.
+        Returns
+        -------
+        list of int
+            The list of column indices to drop.
+        """
         if not isinstance(vars, (list, tuple, set)):
             vars = [vars]
@@ -260,7 +400,19 @@ class Dataset:
         return sorted([v for k, v in self.name_to_index.items() if k not in vars])
-    def _reorder_to_columns(self, vars):
+    def _reorder_to_columns(self, vars: Union[str, List[str], Tuple[str], Dict[str, int]]) -> List[int]:
+        """Convert variable names to a list of reordered column indices.
+        Parameters
+        ----------
+        vars : str, list of str, tuple of str, or dict of str to int
+            The variable names.
+        Returns
+        -------
+        list of int
+            The list of reordered column indices.
+        """
         if isinstance(vars, str) and vars == "sort":
             # Sorting the variables alphabetically.
             # This is cruical for pre-training then transfer learning in combination with
@@ -280,20 +432,55 @@ class Dataset:
         return indices
-    def dates_interval_to_indices(self, start, end):
+    def dates_interval_to_indices(
+        self, start: Union[None, str, datetime.datetime], end: Union[None, str, datetime.datetime]
+    ) -> List[int]:
+        """Convert date interval to a list of indices.
+        Parameters
+        ----------
+        start : None, str, or datetime.datetime
+            The start date.
+        end : None, str, or datetime.datetime
+            The end date.
+        Returns
+        -------
+        list of int
+            The list of indices.
+        """
         return self._dates_to_indices(start, end)
-    def provenance(self):
+    def provenance(self) -> Dict[str, Any]:
+        """Return the provenance information of the dataset.
+        Returns
+        -------
+        dict
+            The provenance information.
+        """
         return {}
-    def sub_shape(self, drop_axis):
-        shape = self.shape
-        shape = list(shape)
+    def sub_shape(self, drop_axis: int) -> TupleIndex:
+        """Return the shape of the dataset with one axis dropped.
+        Parameters
+        ----------
+        drop_axis : int
+            The axis to drop.
+        Returns
+        -------
+        tuple
+            The shape with one axis dropped.
+        """
+        shape = list(self.shape)
         shape.pop(drop_axis)
         return tuple(shape)
     @property
-    def typed_variables(self):
+    def typed_variables(self) -> Dict[str, Any]:
+        """Return the variables with their types."""
         from anemoi.transform.variables import Variable
         constants = self.constant_fields
@@ -313,12 +500,26 @@ class Dataset:
         return result
-    def _input_sources(self):
+    def _input_sources(self) -> List[Any]:
+        """Return the input sources of the dataset.
+        Returns
+        -------
+        list
+            The input sources.
+        """
         sources = []
         self.collect_input_sources(sources)
         return sources
-    def metadata(self):
+    def metadata(self) -> Dict[str, Any]:
+        """Return the metadata of the dataset.
+        Returns
+        -------
+        dict
+            The metadata.
+        """
         import anemoi
         _, source_to_arrays = self._supporting_arrays_and_sources()
@@ -346,14 +547,23 @@ class Dataset:
             raise
     @property
-    def start_date(self):
+    def start_date(self) -> np.datetime64:
+        """Return the start date of the dataset."""
         return self.dates[0]
     @property
-    def end_date(self):
+    def end_date(self) -> np.datetime64:
+        """Return the end date of the dataset."""
         return self.dates[-1]
-    def dataset_metadata(self):
+    def dataset_metadata(self) -> Dict[str, Any]:
+        """Return the metadata of the dataset.
+        Returns
+        -------
+        dict
+            The metadata.
+        """
         return dict(
             specific=self.metadata_specific(),
             frequency=self.frequency,
@@ -366,11 +576,21 @@ class Dataset:
             name=self.name,
         )
-    def _supporting_arrays(self, *path):
+    def _supporting_arrays(self, *path: str) -> Dict[str, NDArray[Any]]:
+        """Return the supporting arrays of the dataset.
-        import numpy as np
+        Parameters
+        ----------
+        *path : str
+            The path components.
-        def _path(path, name):
+        Returns
+        -------
+        dict
+            The supporting arrays.
+        """
+        def _path(path, name: str) -> str:
             return "/".join(str(_) for _ in [*path, name])
         result = {
@@ -394,13 +614,25 @@ class Dataset:
         return result
-    def supporting_arrays(self):
-        """Arrays to be saved in the checkpoints"""
+    def supporting_arrays(self) -> Dict[str, NDArray[Any]]:
+        """Return the supporting arrays to be saved in the checkpoints.
+        Returns
+        -------
+        dict
+            The supporting arrays.
+        """
         arrays, _ = self._supporting_arrays_and_sources()
         return arrays
-    def _supporting_arrays_and_sources(self):
+    def _supporting_arrays_and_sources(self) -> Tuple[Dict[str, NDArray], Dict[int, List[str]]]:
+        """Return the supporting arrays and their sources.
+        Returns
+        -------
+        tuple
+            The supporting arrays and their sources.
+        """
         source_to_arrays = {}
         # Top levels arrays
@@ -420,11 +652,32 @@ class Dataset:
         return result, source_to_arrays
-    def collect_supporting_arrays(self, collected, *path):
+    def collect_supporting_arrays(self, collected: List[Tuple[Tuple[str, ...], str, NDArray[Any]]], *path: str) -> None:
+        """Collect supporting arrays.
+        Parameters
+        ----------
+        collected : list of tuple
+            The collected supporting arrays.
+        *path : str
+            The path components.
+        """
         # Override this method to add more arrays
         pass
-    def metadata_specific(self, **kwargs):
+    def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
+        """Return specific metadata of the dataset.
+        Parameters
+        ----------
+        **kwargs : Any
+            Additional keyword arguments.
+        Returns
+        -------
+        dict
+            The specific metadata.
+        """
         action = self.__class__.__name__.lower()
         # assert isinstance(self.frequency, datetime.timedelta), (self.frequency, self, action)
         return dict(
@@ -437,33 +690,53 @@ class Dataset:
             **kwargs,
         )
-    def __repr__(self):
+    def __repr__(self) -> str:
+        """Return the string representation of the dataset.
+        Returns
+        -------
+        str
+            The string representation.
+        """
         return self.__class__.__name__ + "()"
     @property
-    def grids(self):
+    def grids(self) -> TupleIndex:
+        """Return the grid shape of the dataset."""
         return (self.shape[-1],)
-    def _check(ds):
-        common = Dataset.__dict__.keys() & ds.__class__.__dict__.keys()
-        overriden = [m for m in common if Dataset.__dict__[m] is not ds.__class__.__dict__[m]]
+    def _check(self) -> None:
+        """Check for overridden private methods in the dataset."""
+        common = Dataset.__dict__.keys() & self.__class__.__dict__.keys()
+        overriden = [m for m in common if Dataset.__dict__[m] is not self.__class__.__dict__[m]]
         for n in overriden:
-            if n.startswith("_") and not n.startswith("__"):
-                warnings.warn(f"Private method {n} is overriden in {ds.__class__.__name__}")
+            if n.startswith("_") and not n.startswith("__") and n not in ("_abc_impl",):
+                warnings.warn(f"Private method {n} is overriden in {self.__class__.__name__}")
+    def _repr_html_(self) -> str:
+        """Return the HTML representation of the dataset.
-    def _repr_html_(self):
+        Returns
+        -------
+        str
+            The HTML representation.
+        """
         return self.tree().html()
     @property
-    def label(self):
+    def label(self) -> str:
+        """Return the label of the dataset."""
         return self.__class__.__name__.lower()
-    def get_dataset_names(self, names):
-        raise NotImplementedError(self)
+    def computed_constant_fields(self) -> List[str]:
+        """Return the computed constant fields of the dataset.
-    def computed_constant_fields(self):
-        # Call `constant_fields` instead of `computed_constant_fields`
+        Returns
+        -------
+        list of str
+            The computed constant fields.
+        """
         try:
             # If the tendencies are computed, we can use them
             return sorted(self._compute_constant_fields_from_statistics())
@@ -473,8 +746,14 @@ class Dataset:
         return sorted(self._compute_constant_fields_from_a_few_samples())
-    def _compute_constant_fields_from_a_few_samples(self):
+    def _compute_constant_fields_from_a_few_samples(self) -> List[str]:
+        """Compute constant fields from a few samples.
+        Returns
+        -------
+        list of str
+            The computed constant fields.
+        """
         import numpy as np
         # Otherwise, we need to compute them
@@ -508,7 +787,14 @@ class Dataset:
         return [v for i, v in enumerate(self.variables) if constants[i]]
-    def _compute_constant_fields_from_statistics(self):
+    def _compute_constant_fields_from_statistics(self) -> List[str]:
+        """Compute constant fields from statistics.
+        Returns
+        -------
+        list of str
+            The computed constant fields.
+        """
         result = []
         t = self.statistics_tendencies()
@@ -519,7 +805,13 @@ class Dataset:
         return result
-    def plot(self, date, variable, member=0, **kwargs):
+    def plot(
+        self,
+        date: Union[int, datetime.datetime, np.datetime64, str],
+        variable: Union[int, str],
+        member: int = 0,
+        **kwargs: Any,
+    ) -> "matplotlib.pyplot.Axes":
         """For debugging purposes, plot a field.
         Parameters
@@ -530,17 +822,42 @@ class Dataset:
             The variable to plot.
         member : int, optional
             The ensemble member to plot.
+        **kwargs : Any
+            Additional arguments to pass to matplotlib.pyplot.tricontourf.
+        Returns
+        -------
+        matplotlib.pyplot.Axes
+            The plot axes.
+        """
+        from anemoi.utils.devtools import plot_values
-        **kwargs:
-            Additional arguments to pass to matplotlib.pyplot.tricontourf
+        values = self[self.to_index(date, variable, member)]
+        return plot_values(values, self.latitudes, self.longitudes, **kwargs)
+    def to_index(
+        self,
+        date: Union[int, datetime.datetime, np.datetime64, str],
+        variable: Union[int, str],
+        member: int = 0,
+    ) -> Tuple[int, int, int]:
+        """Convert date, variable, and member to indices.
+        Parameters
+        ----------
+        date : int or datetime.datetime or numpy.datetime64 or str
+            The date.
+        variable : int or str
+            The variable.
+        member : int, optional
+            The ensemble member.
         Returns
         -------
-            matplotlib.pyplot.Axes
+        tuple of int
+            The indices.
         """
-        from anemoi.utils.devtools import plot_values
         from earthkit.data.utils.dates import to_datetime
         if not isinstance(date, int):
@@ -554,6 +871,8 @@ class Dataset:
         else:
             date_index = date
+        date_index = int(date_index)  # because np.int64 is not instance of int
         if isinstance(variable, int):
             variable_index = variable
         else:
@@ -562,6 +881,178 @@ class Dataset:
             variable_index = self.name_to_index[variable]
-        values = self[date_index, variable_index, member]
+        return (date_index, variable_index, member)
-        return plot_values(values, self.latitudes, self.longitudes, **kwargs)
+    @abstractmethod
+    def __getitem__(self, n: FullIndex) -> NDArray[Any]:
+        """Get the item at the specified index.
+        Parameters
+        ----------
+        n : FullIndex
+            Index to retrieve.
+        Returns
+        -------
+        NDArray[Any]
+            Retrieved item.
+        """
+    @abstractmethod
+    def __len__(self) -> int:
+        """Return the length of the dataset.
+        Returns
+        -------
+        int
+            The length of the dataset.
+        """
+    @property
+    @abstractmethod
+    def variables(self) -> List[str]:
+        """Return the list of variables in the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def frequency(self) -> datetime.timedelta:
+        """Return the frequency of the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def dates(self) -> NDArray[np.datetime64]:
+        """Return the dates in the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def resolution(self) -> str:
+        """Return the resolution of the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def name_to_index(self) -> Dict[str, int]:
+        """Return the mapping of variable names to indices."""
+        pass
+    @property
+    @abstractmethod
+    def shape(self) -> Shape:
+        """Return the shape of the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def field_shape(self) -> Shape:
+        """Return the shape of the fields in the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def dtype(self) -> np.dtype:
+        """Return the data type of the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def latitudes(self) -> NDArray[Any]:
+        """Return the latitudes in the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def longitudes(self) -> NDArray[Any]:
+        """Return the longitudes in the dataset."""
+        pass
+    @property
+    @abstractmethod
+    def variables_metadata(self) -> Dict[str, Any]:
+        """Return the metadata of the variables in the dataset."""
+        pass
+    @abstractmethod
+    @cached_property
+    def missing(self) -> Set[int]:
+        """Return the set of missing indices in the dataset."""
+        pass
+    @abstractmethod
+    @cached_property
+    def constant_fields(self) -> List[str]:
+        """Return the list of constant fields in the dataset."""
+        pass
+    @abstractmethod
+    @cached_property
+    def statistics(self) -> Dict[str, NDArray[Any]]:
+        """Return the statistics of the dataset."""
+        pass
+    @abstractmethod
+    def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
+        """Return the tendencies of the statistics in the dataset.
+        Parameters
+        ----------
+        delta : datetime.timedelta, optional
+            The time delta for computing tendencies.
+        Returns
+        -------
+        dict
+            The tendencies.
+        """
+        pass
+    @abstractmethod
+    def source(self, index: int) -> Source:
+        """Return the source of the dataset at the specified index.
+        Parameters
+        ----------
+        index : int
+            The index.
+        Returns
+        -------
+        Source
+            The source.
+        """
+        pass
+    @abstractmethod
+    def tree(self) -> Node:
+        """Return the tree representation of the dataset.
+        Returns
+        -------
+        Node
+            The tree representation.
+        """
+        pass
+    @abstractmethod
+    def collect_input_sources(self, sources: List[Any]) -> None:
+        """Collect the input sources of the dataset.
+        Parameters
+        ----------
+        sources : list
+            The input sources.
+        """
+        pass
+    @abstractmethod
+    def get_dataset_names(self, names: Set[str]) -> None:
+        """Get the names of the datasets.
+        Parameters
+        ----------
+        names : set of str
+            The dataset names.
+        """
+        pass

anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl

anemoi-datasets 0.5.16py3-none-any.whl → 0.5.17py3-none-any.whl