PyPI - anemoi-datasets - Versions diffs - 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl - Mend

anemoi-datasets 0.5.15py3-none-any.whl → 0.5.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (155) hide show

anemoi/datasets/__init__.py +4 -1
anemoi/datasets/__main__.py +12 -2
anemoi/datasets/_version.py +9 -4
anemoi/datasets/commands/cleanup.py +17 -2
anemoi/datasets/commands/compare.py +18 -2
anemoi/datasets/commands/copy.py +196 -14
anemoi/datasets/commands/create.py +50 -7
anemoi/datasets/commands/finalise-additions.py +17 -2
anemoi/datasets/commands/finalise.py +17 -2
anemoi/datasets/commands/init-additions.py +17 -2
anemoi/datasets/commands/init.py +16 -2
anemoi/datasets/commands/inspect.py +283 -62
anemoi/datasets/commands/load-additions.py +16 -2
anemoi/datasets/commands/load.py +16 -2
anemoi/datasets/commands/patch.py +17 -2
anemoi/datasets/commands/publish.py +17 -2
anemoi/datasets/commands/scan.py +31 -3
anemoi/datasets/compute/recentre.py +47 -11
anemoi/datasets/create/__init__.py +612 -85
anemoi/datasets/create/check.py +142 -20
anemoi/datasets/create/chunks.py +64 -4
anemoi/datasets/create/config.py +185 -21
anemoi/datasets/create/filter.py +50 -0
anemoi/datasets/create/filters/__init__.py +33 -0
anemoi/datasets/create/filters/empty.py +37 -0
anemoi/datasets/create/filters/legacy.py +93 -0
anemoi/datasets/create/filters/noop.py +37 -0
anemoi/datasets/create/filters/orog_to_z.py +58 -0
anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
anemoi/datasets/create/filters/rename.py +205 -0
anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
anemoi/datasets/create/filters/transform.py +53 -0
anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
anemoi/datasets/create/input/__init__.py +76 -5
anemoi/datasets/create/input/action.py +149 -13
anemoi/datasets/create/input/concat.py +81 -10
anemoi/datasets/create/input/context.py +39 -4
anemoi/datasets/create/input/data_sources.py +72 -6
anemoi/datasets/create/input/empty.py +21 -3
anemoi/datasets/create/input/filter.py +60 -12
anemoi/datasets/create/input/function.py +154 -37
anemoi/datasets/create/input/join.py +86 -14
anemoi/datasets/create/input/misc.py +67 -17
anemoi/datasets/create/input/pipe.py +33 -6
anemoi/datasets/create/input/repeated_dates.py +189 -41
anemoi/datasets/create/input/result.py +202 -87
anemoi/datasets/create/input/step.py +119 -22
anemoi/datasets/create/input/template.py +100 -13
anemoi/datasets/create/input/trace.py +62 -7
anemoi/datasets/create/patch.py +52 -4
anemoi/datasets/create/persistent.py +134 -17
anemoi/datasets/create/size.py +15 -1
anemoi/datasets/create/source.py +51 -0
anemoi/datasets/create/sources/__init__.py +36 -0
anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
anemoi/datasets/create/sources/empty.py +37 -0
anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
anemoi/datasets/create/sources/grib.py +297 -0
anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
anemoi/datasets/create/sources/legacy.py +93 -0
anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
anemoi/datasets/create/sources/netcdf.py +42 -0
anemoi/datasets/create/sources/opendap.py +43 -0
anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
anemoi/datasets/create/sources/recentre.py +150 -0
anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
anemoi/datasets/create/sources/xarray.py +92 -0
anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
anemoi/datasets/create/sources/xarray_support/README.md +1 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
anemoi/datasets/create/sources/xarray_support/time.py +391 -0
anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
anemoi/datasets/create/sources/xarray_zarr.py +41 -0
anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
anemoi/datasets/create/statistics/__init__.py +233 -44
anemoi/datasets/create/statistics/summary.py +52 -6
anemoi/datasets/create/testing.py +76 -0
anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
anemoi/datasets/create/utils.py +97 -6
anemoi/datasets/create/writer.py +26 -4
anemoi/datasets/create/zarr.py +170 -23
anemoi/datasets/data/__init__.py +51 -4
anemoi/datasets/data/complement.py +191 -40
anemoi/datasets/data/concat.py +141 -16
anemoi/datasets/data/dataset.py +552 -61
anemoi/datasets/data/debug.py +197 -26
anemoi/datasets/data/ensemble.py +93 -8
anemoi/datasets/data/fill_missing.py +165 -18
anemoi/datasets/data/forwards.py +428 -56
anemoi/datasets/data/grids.py +323 -97
anemoi/datasets/data/indexing.py +112 -19
anemoi/datasets/data/interpolate.py +92 -12
anemoi/datasets/data/join.py +158 -19
anemoi/datasets/data/masked.py +129 -15
anemoi/datasets/data/merge.py +137 -23
anemoi/datasets/data/misc.py +172 -16
anemoi/datasets/data/missing.py +233 -29
anemoi/datasets/data/rescale.py +111 -10
anemoi/datasets/data/select.py +168 -26
anemoi/datasets/data/statistics.py +67 -6
anemoi/datasets/data/stores.py +149 -64
anemoi/datasets/data/subset.py +159 -25
anemoi/datasets/data/unchecked.py +168 -57
anemoi/datasets/data/xy.py +168 -25
anemoi/datasets/dates/__init__.py +191 -16
anemoi/datasets/dates/groups.py +189 -47
anemoi/datasets/grids.py +270 -31
anemoi/datasets/testing.py +28 -1
{anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +10 -7
anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
{anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
{anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +1 -1
anemoi/datasets/create/functions/__init__.py +0 -66
anemoi/datasets/create/functions/filters/__init__.py +0 -9
anemoi/datasets/create/functions/filters/empty.py +0 -17
anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
anemoi/datasets/create/functions/filters/rename.py +0 -79
anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
anemoi/datasets/create/functions/sources/empty.py +0 -15
anemoi/datasets/create/functions/sources/grib.py +0 -150
anemoi/datasets/create/functions/sources/netcdf.py +0 -15
anemoi/datasets/create/functions/sources/opendap.py +0 -15
anemoi/datasets/create/functions/sources/recentre.py +0 -60
anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
anemoi/datasets/utils/fields.py +0 -47
anemoi_datasets-0.5.15.dist-info/RECORD +0 -129
{anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/check.py CHANGED Viewed

@@ -8,25 +8,48 @@
 # nor does it submit to any jurisdiction.
+import datetime
 import logging
 import re
 import warnings
+from typing import Any
+from typing import Callable
+from typing import Optional
+from typing import Union
 import numpy as np
 from anemoi.utils.dates import frequency_to_string
+from numpy.typing import NDArray
 LOG = logging.getLogger(__name__)
 class DatasetName:
+    """Class to validate and parse dataset names according to naming conventions."""
     def __init__(
         self,
-        name,
-        resolution=None,
-        start_date=None,
-        end_date=None,
-        frequency=None,
+        name: str,
+        resolution: Optional[str] = None,
+        start_date: Optional[datetime.date] = None,
+        end_date: Optional[datetime.date] = None,
+        frequency: Optional[datetime.timedelta] = None,
     ):
+        """Initialize a DatasetName instance.
+        Parameters
+        ----------
+        name : str
+            The name of the dataset.
+        resolution : Optional[str], optional
+            The resolution of the dataset.
+        start_date : Optional[datetime.date], optional
+            The start date of the dataset.
+        end_date : Optional[datetime.date], optional
+            The end date of the dataset.
+        frequency : Optional[datetime.timedelta], optional
+            The frequency of the dataset.
+        """
         self.name = name
         self.parsed = self._parse(name)
         print("---------------")
@@ -45,19 +68,39 @@ class DatasetName:
             self.messages.append(f"{self} is parsed as :" + "/".join(f"{k}={v}" for k, v in self.parsed.items()))
     @property
-    def error_message(self):
+    def error_message(self) -> str:
+        """Generate an error message based on the collected messages."""
         out = " And ".join(self.messages)
         if out:
-            out = out[0].upper() + out[1:]
+            out[0].upper() + out[1:]
         return out
-    def raise_if_not_valid(self, print=print):
+    def raise_if_not_valid(self, print: Callable = print) -> None:
+        """Raise a ValueError if the dataset name is not valid.
+        Parameters
+        ----------
+        print : Callable
+            The function to use for printing messages.
+        """
         if self.messages:
             for m in self.messages:
                 print(m)
             raise ValueError(self.error_message)
-    def _parse(self, name):
+    def _parse(self, name: str) -> dict:
+        """Parse the dataset name into its components.
+        Parameters
+        ----------
+        name : str
+            The name of the dataset.
+        Returns
+        -------
+        dict
+            The parsed components of the dataset name.
+        """
         pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h|\d+m)-v(\d+)-?([a-zA-Z0-9-]+)?$"
         match = re.match(pattern, name)
@@ -81,10 +124,12 @@ class DatasetName:
         return parsed
-    def __str__(self):
+    def __str__(self) -> str:
+        """Return the string representation of the dataset name."""
         return self.name
-    def check_parsed(self):
+    def check_parsed(self) -> None:
+        """Check if the dataset name was parsed correctly."""
         if not self.parsed:
             self.messages.append(
                 f"the dataset name {self} does not follow naming convention. "
@@ -92,7 +137,14 @@ class DatasetName:
                 "https://anemoi-registry.readthedocs.io/en/latest/naming-conventions.html"
             )
-    def check_resolution(self, resolution):
+    def check_resolution(self, resolution: Optional[str]) -> None:
+        """Check if the resolution matches the expected format.
+        Parameters
+        ----------
+        resolution : str or None
+            The expected resolution.
+        """
         if self.parsed.get("resolution") and self.parsed["resolution"][0] not in "0123456789on":
             self.messages.append(
                 f"the resolution {self.parsed['resolution'] } should start "
@@ -105,42 +157,97 @@ class DatasetName:
         self._check_missing("resolution", resolution_str)
         self._check_mismatch("resolution", resolution_str)
-    def check_frequency(self, frequency):
+    def check_frequency(self, frequency: Optional[datetime.timedelta]) -> None:
+        """Check if the frequency matches the expected format.
+        Parameters
+        ----------
+        frequency : datetime.timedelta or None
+            The expected frequency.
+        """
         if frequency is None:
             return
         frequency_str = frequency_to_string(frequency)
         self._check_missing("frequency", frequency_str)
         self._check_mismatch("frequency", frequency_str)
-    def check_start_date(self, start_date):
+    def check_start_date(self, start_date: Optional[datetime.date]) -> None:
+        """Check if the start date matches the expected format.
+        Parameters
+        ----------
+        start_date : datetime.date or None
+            The expected start date.
+        """
         if start_date is None:
             return
         start_date_str = str(start_date.year)
         self._check_missing("start_date", start_date_str)
         self._check_mismatch("start_date", start_date_str)
-    def check_end_date(self, end_date):
+    def check_end_date(self, end_date: Optional[datetime.date]) -> None:
+        """Check if the end date matches the expected format.
+        Parameters
+        ----------
+        end_date : datetime.date or None
+            The expected end date.
+        """
         if end_date is None:
             return
         end_date_str = str(end_date.year)
         self._check_missing("end_date", end_date_str)
         self._check_mismatch("end_date", end_date_str)
-    def _check_missing(self, key, value):
+    def _check_missing(self, key: str, value: str) -> None:
+        """Check if a component is missing from the dataset name.
+        Parameters
+        ----------
+        key : str
+            The component key.
+        value : str
+            The expected value.
+        """
         if value not in self.name:
             self.messages.append(f"the {key} is {value}, but is missing in {self.name}.")
-    def _check_mismatch(self, key, value):
+    def _check_mismatch(self, key: str, value: str) -> None:
+        """Check if a component value mismatches the expected value.
+        Parameters
+        ----------
+        key : str
+            The component key.
+        value : str
+            The expected value.
+        """
         if self.parsed.get(key) and self.parsed[key] != value:
             self.messages.append(f"the {key} is {value}, but is {self.parsed[key]} in {self.name}.")
 class StatisticsValueError(ValueError):
-    pass
+    """Custom error for statistics value issues."""
+    pass
-def check_data_values(arr, *, name: str, log=[], allow_nans=False):
+def check_data_values(
+    arr: NDArray[Any], *, name: str, log: list = [], allow_nans: Union[bool, list, set, tuple, dict] = False
+) -> None:
+    """Check the values in the data array for validity.
+    Parameters
+    ----------
+    arr : NDArray[Any]
+        The data array to check.
+    name : str
+        The name of the data array.
+    log : list, optional
+        A list to log messages.
+    allow_nans : bool or list or set or tuple or dict, optional
+        Whether to allow NaNs in the data array.
+    """
     shape = arr.shape
     if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
@@ -182,7 +289,22 @@ def check_data_values(arr, *, name: str, log=[], allow_nans=False):
             )
-def check_stats(minimum, maximum, mean, msg, **kwargs):
+def check_stats(minimum: float, maximum: float, mean: float, msg: str, **kwargs: Any) -> None:
+    """Check if the mean value is within the min/max interval.
+    Parameters
+    ----------
+    minimum : float
+        The minimum value.
+    maximum : float
+        The maximum value.
+    mean : float
+        The mean value.
+    msg : str
+        The message to include in the error.
+    **kwargs : Any
+        Additional keyword arguments.
+    """
     tolerance = (abs(minimum) + abs(maximum)) * 0.01
     if (mean - minimum < -tolerance) or (mean - minimum < -tolerance):
         raise StatisticsValueError(

anemoi/datasets/create/chunks.py CHANGED Viewed

@@ -9,6 +9,7 @@
 import logging
 import warnings
+from typing import Union
 LOG = logging.getLogger(__name__)
@@ -16,7 +17,35 @@ ALL = object()
 class ChunkFilter:
-    def __init__(self, *, parts, total):
+    """A filter to determine which chunks to process based on the specified parts.
+    Attributes
+    ----------
+    total : int
+        The total number of chunks.
+    allowed : object or list
+        The chunks that are allowed to be processed.
+    """
+    def __init__(self, *, parts: Union[str, list], total: int):
+        """Initializes the ChunkFilter with the given parts and total number of chunks.
+        Parameters
+        ----------
+        parts : str or list
+            The parts to process, specified as 'i/n' or a list of such strings.
+        total : int
+            The total number of chunks.
+        Raises
+        ------
+        ValueError
+            If the parts format is invalid.
+        AssertionError
+            If the chunk number is invalid.
+        Warning
+            If the number of chunks is larger than the total number of chunks.
+        """
         self.total = total
         if isinstance(parts, list):
@@ -62,7 +91,24 @@ class ChunkFilter:
         self.allowed = parts
-    def __call__(self, i):
+    def __call__(self, i: int) -> bool:
+        """Checks if the given chunk number is allowed to be processed.
+        Parameters
+        ----------
+        i : int
+            The chunk number to check.
+        Returns
+        -------
+        bool
+            True if the chunk is allowed, False otherwise.
+        Raises
+        ------
+        AssertionError
+            If the chunk number is invalid.
+        """
         if i < 0 or i >= self.total:
             raise AssertionError(f"Invalid chunk number {i}. Must be between 0 and {self.total - 1}.")
@@ -70,10 +116,24 @@ class ChunkFilter:
             return True
         return i in self.allowed
-    def __iter__(self):
+    def __iter__(self) -> iter:
+        """Iterates over the allowed chunks.
+        Yields
+        ------
+        int
+            The next allowed chunk number.
+        """
         for i in range(self.total):
             if self(i):
                 yield i
-    def __len__(self):
+    def __len__(self) -> int:
+        """Returns the number of allowed chunks.
+        Returns
+        -------
+        int
+            The number of allowed chunks.
+        """
         return len([_ for _ in self])

anemoi/datasets/create/config.py CHANGED Viewed

@@ -11,6 +11,9 @@ import datetime
 import logging
 import os
 from copy import deepcopy
+from typing import Any
+from typing import Optional
+from typing import Union
 import yaml
 from anemoi.utils.config import DotDict
@@ -22,13 +25,41 @@ from anemoi.datasets.dates.groups import Groups
 LOG = logging.getLogger(__name__)
-def _get_first_key_if_dict(x):
+def _get_first_key_if_dict(x: Union[str, dict]) -> str:
+    """Returns the first key if the input is a dictionary, otherwise returns the input string.
+    Parameters
+    ----------
+    x : str or dict
+        Input string or dictionary.
+    Returns
+    -------
+    str
+        The first key if input is a dictionary, otherwise the input string.
+    """
     if isinstance(x, str):
         return x
     return list(x.keys())[0]
-def ensure_element_in_list(lst, elt, index):
+def ensure_element_in_list(lst: list, elt: str, index: int) -> list:
+    """Ensures that a specified element is present at a given index in a list.
+    Parameters
+    ----------
+    lst : list
+        The list to check.
+    elt : str
+        The element to ensure is in the list.
+    index : int
+        The index at which the element should be present.
+    Returns
+    -------
+    list
+        The modified list with the element at the specified index.
+    """
     if elt in lst:
         assert lst[index] == elt
         return lst
@@ -41,7 +72,23 @@ def ensure_element_in_list(lst, elt, index):
     return lst[:index] + [elt] + lst[index:]
-def check_dict_value_and_set(dic, key, value):
+def check_dict_value_and_set(dic: dict, key: str, value: Any) -> None:
+    """Checks if a dictionary contains a specific key-value pair and sets it if not present.
+    Parameters
+    ----------
+    dic : dict
+        The dictionary to check.
+    key : str
+        The key to check in the dictionary.
+    value : Any
+        The value to set if the key is not present.
+    Raises
+    ------
+    ValueError
+        If the key is present but with a different value.
+    """
     if key in dic:
         if dic[key] == value:
             return
@@ -50,7 +97,19 @@ def check_dict_value_and_set(dic, key, value):
     dic[key] = value
-def resolve_includes(config):
+def resolve_includes(config: Union[dict, list]) -> Union[dict, list]:
+    """Resolves '<<' includes in a configuration dictionary or list.
+    Parameters
+    ----------
+    config : dict or list
+        The configuration to resolve includes for.
+    Returns
+    -------
+    dict or list
+        The configuration with includes resolved.
+    """
     if isinstance(config, list):
         return [resolve_includes(c) for c in config]
     if isinstance(config, dict):
@@ -62,7 +121,18 @@ def resolve_includes(config):
 class Config(DotDict):
-    def __init__(self, config=None, **kwargs):
+    """Configuration class that extends DotDict to handle configuration loading and processing."""
+    def __init__(self, config: Optional[Union[str, dict]] = None, **kwargs):
+        """Initializes the Config object.
+        Parameters
+        ----------
+        config : str or dict, optional
+            Path to the configuration file or a dictionary. Defaults to None.
+        **kwargs
+            Additional keyword arguments to update the configuration.
+        """
         if isinstance(config, str):
             self.config_path = os.path.realpath(config)
             config = load_any_dict_format(config)
@@ -74,7 +144,18 @@ class Config(DotDict):
 class OutputSpecs:
-    def __init__(self, config, parent):
+    """Class to handle output specifications for datasets."""
+    def __init__(self, config: Config, parent: Any):
+        """Initializes the OutputSpecs object.
+        Parameters
+        ----------
+        config : Config
+            The configuration object.
+        parent : Any
+            The parent object.
+        """
         self.config = config
         if "order_by" in config:
             assert isinstance(config.order_by, dict), config.order_by
@@ -82,15 +163,28 @@ class OutputSpecs:
         self.parent = parent
     @property
-    def dtype(self):
+    def dtype(self) -> str:
+        """Returns the data type for the output."""
         return self.config.dtype
     @property
-    def order_by_as_list(self):
-        # this is used when an ordered dict is not supported (e.g. zarr attributes)
+    def order_by_as_list(self) -> list[dict]:
+        """Returns the order_by configuration as a list of dictionaries."""
         return [{k: v} for k, v in self.config.order_by.items()]
-    def get_chunking(self, coords):
+    def get_chunking(self, coords: dict) -> tuple:
+        """Returns the chunking configuration based on coordinates.
+        Parameters
+        ----------
+        coords : dict
+            The coordinates dictionary.
+        Returns
+        -------
+        tuple
+            The chunking configuration.
+        """
         user = deepcopy(self.config.chunking)
         chunks = []
         for k, v in coords.items():
@@ -105,25 +199,41 @@ class OutputSpecs:
         return tuple(chunks)
     @property
-    def order_by(self):
+    def order_by(self) -> dict:
+        """Returns the order_by configuration."""
         return self.config.order_by
     @property
-    def remapping(self):
+    def remapping(self) -> dict:
+        """Returns the remapping configuration."""
         return self.config.remapping
     @property
-    def flatten_grid(self):
+    def flatten_grid(self) -> bool:
+        """Returns whether the grid should be flattened."""
         return self.config.flatten_grid
     @property
-    def statistics(self):
+    def statistics(self) -> str:
+        """Returns the statistics configuration."""
         return self.config.statistics
 class LoadersConfig(Config):
-    def __init__(self, config, *args, **kwargs):
+    """Configuration class for dataset loaders."""
+    def __init__(self, config: dict, *args, **kwargs):
+        """Initializes the LoadersConfig object.
+        Parameters
+        ----------
+        config : dict
+            The configuration dictionary.
+        *args
+            Additional positional arguments.
+        **kwargs
+            Additional keyword arguments.
+        """
         super().__init__(config, *args, **kwargs)
         # TODO: should use a json schema to validate the config
@@ -178,11 +288,30 @@ class LoadersConfig(Config):
         self.reading_chunks = self.get("reading_chunks")
-    def get_serialisable_dict(self):
+    def get_serialisable_dict(self) -> dict:
+        """Returns a serializable dictionary representation of the configuration.
+        Returns
+        -------
+        dict
+            The serializable dictionary.
+        """
         return _prepare_serialisation(self)
-def _prepare_serialisation(o):
+def _prepare_serialisation(o: Any) -> Any:
+    """Prepares an object for serialization.
+    Parameters
+    ----------
+    o : Any
+        The object to prepare.
+    Returns
+    -------
+    Any
+        The prepared object.
+    """
     if isinstance(o, dict):
         dic = {}
         for k, v in o.items():
@@ -212,7 +341,14 @@ def _prepare_serialisation(o):
     return str(o)
-def set_to_test_mode(cfg):
+def set_to_test_mode(cfg: dict) -> None:
+    """Modifies the configuration to run in test mode.
+    Parameters
+    ----------
+    cfg : dict
+        The configuration dictionary.
+    """
     NUMBER_OF_DATES = 4
     LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
@@ -251,7 +387,21 @@ def set_to_test_mode(cfg):
     set_element_to_test(cfg)
-def loader_config(config, is_test=False):
+def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
+    """Loads and validates the configuration for dataset loaders.
+    Parameters
+    ----------
+    config : dict
+        The configuration dictionary.
+    is_test : bool, optional
+        Whether to run in test mode. Defaults to False.
+    Returns
+    -------
+    LoadersConfig
+        The validated configuration object.
+    """
     config = Config(config)
     if is_test:
         set_to_test_mode(config)
@@ -273,5 +423,19 @@ def loader_config(config, is_test=False):
     return copy
-def build_output(*args, **kwargs):
+def build_output(*args, **kwargs) -> OutputSpecs:
+    """Builds the output specifications.
+    Parameters
+    ----------
+    *args
+        Additional positional arguments.
+    **kwargs
+        Additional keyword arguments.
+    Returns
+    -------
+    OutputSpecs
+        The output specifications object.
+    """
     return OutputSpecs(*args, **kwargs)

anemoi-datasets 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl

anemoi-datasets 0.5.15py3-none-any.whl → 0.5.17py3-none-any.whl