PyPI - anemoi-datasets - Versions diffs - 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl - Mend

anemoi-datasets 0.5.27py3-none-any.whl → 0.5.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/recipe/__init__.py +93 -0
anemoi/datasets/commands/recipe/format.py +55 -0
anemoi/datasets/commands/recipe/migrate.py +555 -0
anemoi/datasets/create/__init__.py +46 -13
anemoi/datasets/create/config.py +52 -53
anemoi/datasets/create/input/__init__.py +43 -63
anemoi/datasets/create/input/action.py +296 -236
anemoi/datasets/create/input/context/__init__.py +71 -0
anemoi/datasets/create/input/context/field.py +54 -0
anemoi/datasets/create/input/data_sources.py +2 -1
anemoi/datasets/create/input/misc.py +0 -71
anemoi/datasets/create/input/repeated_dates.py +0 -114
anemoi/datasets/create/input/result/__init__.py +17 -0
anemoi/datasets/create/input/{result.py → result/field.py} +10 -92
anemoi/datasets/create/sources/accumulate.py +517 -0
anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +149 -0
anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
anemoi/datasets/create/sources/constants.py +39 -38
anemoi/datasets/create/sources/empty.py +26 -22
anemoi/datasets/create/sources/forcings.py +29 -28
anemoi/datasets/create/sources/grib.py +92 -72
anemoi/datasets/create/sources/grib_index.py +102 -54
anemoi/datasets/create/sources/hindcasts.py +56 -55
anemoi/datasets/create/sources/legacy.py +10 -62
anemoi/datasets/create/sources/mars.py +159 -154
anemoi/datasets/create/sources/netcdf.py +28 -24
anemoi/datasets/create/sources/opendap.py +28 -24
anemoi/datasets/create/sources/recentre.py +42 -41
anemoi/datasets/create/sources/repeated_dates.py +44 -0
anemoi/datasets/create/sources/source.py +26 -48
anemoi/datasets/create/sources/xarray_support/__init__.py +30 -24
anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
anemoi/datasets/create/sources/xarray_support/field.py +4 -4
anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
anemoi/datasets/create/sources/xarray_zarr.py +28 -24
anemoi/datasets/create/sources/zenodo.py +43 -39
anemoi/datasets/create/utils.py +0 -42
anemoi/datasets/data/complement.py +26 -17
anemoi/datasets/data/dataset.py +12 -0
anemoi/datasets/data/grids.py +0 -152
anemoi/datasets/data/masked.py +74 -13
anemoi/datasets/data/missing.py +5 -0
anemoi/datasets/data/rolling_average.py +141 -0
anemoi/datasets/data/stores.py +7 -9
anemoi/datasets/dates/__init__.py +2 -0
anemoi/datasets/dumper.py +76 -0
anemoi/datasets/grids.py +1 -178
anemoi/datasets/schemas/recipe.json +131 -0
{anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/METADATA +9 -6
{anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/RECORD +59 -57
{anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/WHEEL +1 -1
anemoi/datasets/create/filter.py +0 -47
anemoi/datasets/create/input/concat.py +0 -161
anemoi/datasets/create/input/context.py +0 -86
anemoi/datasets/create/input/empty.py +0 -53
anemoi/datasets/create/input/filter.py +0 -117
anemoi/datasets/create/input/function.py +0 -232
anemoi/datasets/create/input/join.py +0 -129
anemoi/datasets/create/input/pipe.py +0 -66
anemoi/datasets/create/input/step.py +0 -173
anemoi/datasets/create/input/template.py +0 -161
anemoi/datasets/create/sources/accumulations.py +0 -1062
anemoi/datasets/create/sources/accumulations2.py +0 -647
anemoi/datasets/create/sources/tendencies.py +0 -198
{anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/licenses/LICENSE +0 -0
{anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.29.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/utils.py CHANGED Viewed

@@ -96,48 +96,6 @@ def to_datetime(*args: Any, **kwargs: Any) -> datetime.datetime:
     return to_datetime_(*args, **kwargs)
-def make_list_int(value: str | list | tuple | int) -> list[int]:
-    """Convert a string, list, tuple, or integer to a list of integers.
-    Parameters
-    ----------
-    value : str or list or tuple or int
-        The value to convert.
-    Returns
-    -------
-    list[int]
-        A list of integers.
-    Raises
-    ------
-    ValueError
-        If the value cannot be converted to a list of integers.
-    """
-    # Convert a string like "1/2/3" or "1/to/3" or "1/to/10/by/2" to a list of integers.
-    # Moved to anemoi.utils.humanize
-    # replace with from anemoi.utils.humanize import make_list_int
-    # when anemoi-utils is released and pyproject.toml is updated
-    if isinstance(value, str):
-        if "/" not in value:
-            return [value]
-        bits = value.split("/")
-        if len(bits) == 3 and bits[1].lower() == "to":
-            value = list(range(int(bits[0]), int(bits[2]) + 1, 1))
-        elif len(bits) == 5 and bits[1].lower() == "to" and bits[3].lower() == "by":
-            value = list(range(int(bits[0]), int(bits[2]) + int(bits[4]), int(bits[4])))
-    if isinstance(value, list):
-        return value
-    if isinstance(value, tuple):
-        return value
-    if isinstance(value, int):
-        return [value]
-    raise ValueError(f"Cannot make list from {value}")
 def normalize_and_check_dates(
     dates: list[datetime.datetime],
     start: datetime.datetime,

anemoi/datasets/data/complement.py CHANGED Viewed

@@ -293,21 +293,29 @@ class ComplementNearest(Complement):
         index, previous = update_tuple(index, variable_index, slice(None))
         source_index = [self._source.name_to_index[x] for x in self.variables[previous]]
         source_data = self._source[index[0], source_index, index[2], ...]
-        target_data = source_data[..., self._nearest_grid_points]
-        epsilon = 1e-8  # prevent division by zero
-        weights = 1.0 / (self._distances + epsilon)
-        weights = weights.astype(target_data.dtype)
-        weights /= weights.sum(axis=1, keepdims=True)  # normalize
-        # Reshape weights to broadcast correctly
-        # Add leading singleton dimensions so it matches target_data shape
-        while weights.ndim < target_data.ndim:
-            weights = np.expand_dims(weights, axis=0)
-        # Compute weighted average along the last dimension
-        final_point = np.sum(target_data * weights, axis=-1)
-        result = final_point[..., index[3]]
+        if any(self._nearest_grid_points >= source_data.shape[-1]):
+            target_shape = source_data.shape[:-1] + self._target.shape[-1:]
+            target_data = np.full(target_shape, np.nan, dtype=self._target.dtype)
+            cond = self._nearest_grid_points < source_data.shape[-1]
+            reachable = np.where(cond)[0]
+            nearest_reachable = self._nearest_grid_points[cond]
+            target_data[..., reachable] = source_data[..., nearest_reachable]
+            result = target_data[..., index[3]]
+        else:
+            target_data = source_data[..., self._nearest_grid_points]
+            epsilon = 1e-8  # prevent division by zero
+            weights = 1.0 / (self._distances + epsilon)
+            weights = weights.astype(target_data.dtype)
+            weights /= weights.sum(axis=1, keepdims=True)  # normalize
+            # Reshape weights to broadcast correctly
+            # Add leading singleton dimensions so it matches target_data shape
+            while weights.ndim < target_data.ndim:
+                weights = np.expand_dims(weights, axis=0)
+            # Compute weighted average along the last dimension
+            final_point = np.sum(target_data * weights, axis=-1)
+            result = final_point[..., index[3]]
         return apply_index_to_slices_changes(result, changes)
@@ -353,8 +361,9 @@ def complement_factory(args: tuple, kwargs: dict) -> Dataset:
     }[interpolation]
     if interpolation == "nearest":
-        k = kwargs.pop("k", "1")
-        complement = Class(target=target, source=source, k=k)._subset(**kwargs)
+        k = kwargs.pop("k", 1)
+        max_distance = kwargs.pop("max_distance", None)
+        complement = Class(target=target, source=source, k=k, max_distance=max_distance)._subset(**kwargs)
     else:
         complement = Class(target=target, source=source)._subset(**kwargs)

anemoi/datasets/data/dataset.py CHANGED Viewed

@@ -245,6 +245,12 @@ class Dataset(ABC, Sized):
             return Statistics(self, open_dataset(statistics))._subset(**kwargs).mutate()
+        if "mask" in kwargs:
+            from .masked import Masking
+            mask_file = kwargs.pop("mask")
+            return Masking(self, mask_file)._subset(**kwargs).mutate()
         # Note: trim_edge should go before thinning
         if "trim_edge" in kwargs:
             from .masked import TrimEdge
@@ -293,6 +299,12 @@ class Dataset(ABC, Sized):
             if skip_missing_dates:
                 return SkipMissingDates(self, expected_access)._subset(**kwargs).mutate()
+        if "rolling_average" in kwargs:
+            from .rolling_average import RollingAverage
+            rolling_average = kwargs.pop("rolling_average")
+            return RollingAverage(self, rolling_average)._subset(**kwargs).mutate()
         if "interpolate_frequency" in kwargs:
             from .interpolate import InterpolateFrequency

anemoi/datasets/data/grids.py CHANGED Viewed

@@ -21,167 +21,15 @@ from .dataset import FullIndex
 from .dataset import Shape
 from .dataset import TupleIndex
 from .debug import Node
-from .debug import debug_indexing
-from .forwards import Combined
 from .forwards import GivenAxis
 from .indexing import apply_index_to_slices_changes
-from .indexing import expand_list_indexing
 from .indexing import index_to_slices
-from .indexing import length_to_slices
-from .indexing import update_tuple
 from .misc import _auto_adjust
 from .misc import _open
 LOG = logging.getLogger(__name__)
-class Concat(Combined):
-    """A class to represent concatenated datasets."""
-    def __len__(self) -> int:
-        """Returns the total length of the concatenated datasets.
-        Returns
-        -------
-        int
-            Total length of the concatenated datasets.
-        """
-        return sum(len(i) for i in self.datasets)
-    @debug_indexing
-    @expand_list_indexing
-    def _get_tuple(self, index: TupleIndex) -> NDArray[Any]:
-        """Retrieves a tuple of data from the concatenated datasets based on the given index.
-        Parameters
-        ----------
-        index : TupleIndex
-            Index specifying the data to retrieve.
-        Returns
-        -------
-        NDArray[Any]
-            Concatenated data array from the specified index.
-        """
-        index, changes = index_to_slices(index, self.shape)
-        # print(index, changes)
-        lengths = [d.shape[0] for d in self.datasets]
-        slices = length_to_slices(index[0], lengths)
-        # print("slies", slices)
-        result = [d[update_tuple(index, 0, i)[0]] for (d, i) in zip(self.datasets, slices) if i is not None]
-        result = np.concatenate(result, axis=0)
-        return apply_index_to_slices_changes(result, changes)
-    @debug_indexing
-    def __getitem__(self, n: FullIndex) -> NDArray[Any]:
-        """Retrieves data from the concatenated datasets based on the given index.
-        Parameters
-        ----------
-        n : FullIndex
-            Index specifying the data to retrieve.
-        Returns
-        -------
-        NDArray[Any]
-            Data array from the concatenated datasets based on the index.
-        """
-        if isinstance(n, tuple):
-            return self._get_tuple(n)
-        if isinstance(n, slice):
-            return self._get_slice(n)
-        # TODO: optimize
-        k = 0
-        while n >= self.datasets[k]._len:
-            n -= self.datasets[k]._len
-            k += 1
-        return self.datasets[k][n]
-    @debug_indexing
-    def _get_slice(self, s: slice) -> NDArray[Any]:
-        """Retrieves a slice of data from the concatenated datasets.
-        Parameters
-        ----------
-        s : slice
-            Slice object specifying the range of data to retrieve.
-        Returns
-        -------
-        NDArray[Any]
-            Concatenated data array from the specified slice.
-        """
-        result = []
-        lengths = [d.shape[0] for d in self.datasets]
-        slices = length_to_slices(s, lengths)
-        result = [d[i] for (d, i) in zip(self.datasets, slices) if i is not None]
-        return np.concatenate(result)
-    def check_compatibility(self, d1: Dataset, d2: Dataset) -> None:
-        """Check the compatibility of two datasets for concatenation.
-        Parameters
-        ----------
-        d1 : Dataset
-            The first dataset.
-        d2 : Dataset
-            The second dataset.
-        """
-        super().check_compatibility(d1, d2)
-        self.check_same_sub_shapes(d1, d2, drop_axis=0)
-    def check_same_lengths(self, d1: Dataset, d2: Dataset) -> None:
-        """Check if the lengths of two datasets are the same.
-        Parameters
-        ----------
-        d1 : Dataset
-            The first dataset.
-        d2 : Dataset
-            The second dataset.
-        """
-        # Turned off because we are concatenating along the first axis
-        pass
-    def check_same_dates(self, d1: Dataset, d2: Dataset) -> None:
-        """Check if the dates of two datasets are the same.
-        Parameters
-        ----------
-        d1 : Dataset
-            The first dataset.
-        d2 : Dataset
-            The second dataset.
-        """
-        # Turned off because we are concatenating along the dates axis
-        pass
-    @property
-    def dates(self) -> NDArray[np.datetime64]:
-        """Returns the concatenated dates of all datasets."""
-        return np.concatenate([d.dates for d in self.datasets])
-    @property
-    def shape(self) -> Shape:
-        """Returns the shape of the concatenated datasets."""
-        return (len(self),) + self.datasets[0].shape[1:]
-    def tree(self) -> Node:
-        """Generates a hierarchical tree structure for the concatenated datasets.
-        Returns
-        -------
-        Node
-            A Node object representing the concatenated datasets.
-        """
-        return Node(self, [d.tree() for d in self.datasets])
 class GridsBase(GivenAxis):
     """A base class for handling grids in datasets."""

anemoi/datasets/data/masked.py CHANGED Viewed

@@ -10,6 +10,7 @@
 import logging
 from functools import cached_property
+from pathlib import Path
 from typing import Any
 import numpy as np
@@ -66,6 +67,12 @@ class Masked(Forwards):
         """Get the masked longitudes."""
         return self.forward.longitudes[self.mask]
+    @property
+    def grids(self) -> TupleIndex:
+        """Returns the number of grid points after masking"""
+        grids = np.sum(self.mask)
+        return (grids,)
     @debug_indexing
     def __getitem__(self, index: FullIndex) -> NDArray[Any]:
         """Get the masked data at the specified index.
@@ -150,19 +157,9 @@ class Thinning(Masked):
             if len(shape) != 2:
                 raise ValueError("Thinning only works latitude/longitude fields")
-            # Make a copy, so we read the data only once from zarr
-            forward_latitudes = forward.latitudes.copy()
-            forward_longitudes = forward.longitudes.copy()
-            latitudes = forward_latitudes.reshape(shape)
-            longitudes = forward_longitudes.reshape(shape)
-            latitudes = latitudes[::thinning, ::thinning].flatten()
-            longitudes = longitudes[::thinning, ::thinning].flatten()
-            # TODO: This is not very efficient
-            mask = [lat in latitudes and lon in longitudes for lat, lon in zip(forward_latitudes, forward_longitudes)]
-            mask = np.array(mask, dtype=bool)
+            mask = np.full(shape, False, dtype=bool)
+            mask[::thinning, ::thinning] = True
+            mask = mask.flatten()
         else:
             mask = None
@@ -200,6 +197,70 @@ class Thinning(Masked):
         """
         return dict(thinning=self.thinning, method=self.method)
+    @property
+    def field_shape(self) -> Shape:
+        """Returns the field shape of the dataset."""
+        if self.thinning is None:
+            return self.forward.field_shape
+        x, y = self.forward.field_shape
+        x = (x + self.thinning - 1) // self.thinning
+        y = (y + self.thinning - 1) // self.thinning
+        return x, y
+class Masking(Masked):
+    """A class that applies a precomputed boolean mask from a .npy file."""
+    def __init__(self, forward: Dataset, mask_file: str) -> None:
+        """Initialize the Masking class.
+        Parameters
+        ----------
+        forward : Dataset
+            The dataset to be masked.
+        mask_file : str
+            Path to a .npy file containing a boolean mask of same shape as fields.
+        """
+        self.mask_file = mask_file
+        # Check path
+        if not Path(self.mask_file).exists():
+            raise FileNotFoundError(f"Mask file not found: {self.mask_file}")
+        # Load mask
+        try:
+            mask = np.load(self.mask_file)
+        except Exception as e:
+            raise ValueError(f"Could not load data from {mask_file}: {e}")
+        if mask.dtype != bool:
+            raise ValueError(f"Mask file {mask_file} does not contain boolean values.")
+        if mask.shape != forward.field_shape:
+            raise ValueError(f"Mask length {mask.shape} does not match field size {forward.field_shape}.")
+        if sum(mask) == 0:
+            LOG.warning(f"Mask in {mask_file} eliminates all points in field.")
+        super().__init__(forward, mask)
+    def tree(self) -> Node:
+        """Get the tree representation of the dataset.
+        Returns
+        -------
+        Node
+            The tree representation of the dataset.
+        """
+        return Node(self, [self.forward.tree()], mask_file=self.mask_file)
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
+        """Get the metadata specific to the Masking subclass.
+        Returns
+        -------
+        Dict[str, Any]
+            The metadata specific to the Masking subclass.
+        """
+        return dict(mask_file=self.mask_file)
 class Cropping(Masked):
     """A class to represent a cropped dataset."""

anemoi/datasets/data/missing.py CHANGED Viewed

@@ -440,3 +440,8 @@ class MissingDataset(Forwards):
             Metadata specific to the subclass.
         """
         return {"start": self.start, "end": self.end}
+    @property
+    def shape(self) -> tuple[int, ...]:
+        """Return the shape of the dataset."""
+        return (len(self),) + self.forward.shape[1:]

anemoi/datasets/data/rolling_average.py ADDED Viewed

@@ -0,0 +1,141 @@
+# (C) Copyright 2025 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+import logging
+from functools import cached_property
+from typing import Any
+import numpy as np
+from numpy.typing import NDArray
+from anemoi.datasets.data.indexing import expand_list_indexing
+from .dataset import Dataset
+from .dataset import FullIndex
+from .debug import Node
+from .debug import debug_indexing
+from .forwards import Forwards
+LOG = logging.getLogger(__name__)
+class RollingAverage(Forwards):
+    """A class to represent a dataset with interpolated frequency."""
+    def __init__(self, dataset: Dataset, window: str | tuple[int, int, str]) -> None:
+        """Initialize the RollingAverage class.
+        Parameters
+        ----------
+        dataset : Dataset
+            The dataset to be averaged with a rolling window.
+        window : (int, int, str)
+            The rolling average window (start, end, 'freq').
+            'freq' means the window is in number of time steps in the dataset.
+            Both start and end are inclusive, i.e. window = (-2, 2, 'freq') means a window of 5 time steps.
+            For now, only 'freq' is supported, in the future other units may be supported.
+            Windows such as "[-2h, +2h]" are not supported yet.
+        """
+        super().__init__(dataset)
+        if not (isinstance(window, (list, tuple)) and len(window) == 3):
+            raise ValueError(f"Window must be (int, int, str), got {window}")
+        if not isinstance(window[0], int) or not isinstance(window[1], int) or not isinstance(window[2], str):
+            raise ValueError(f"Window must be (int, int, str), got {window}")
+        if window[2] not in ["freq", "frequency"]:
+            raise NotImplementedError(f"Window must be (int, int, 'freq'), got {window}")
+        # window = (0, 0, 'freq') means no change
+        self.i_start = -window[0]
+        self.i_end = window[1] + 1
+        if self.i_start <= 0:
+            raise ValueError(f"Window start must be negative, got {window}")
+        if self.i_end <= 0:
+            raise ValueError(f"Window end must be positive, got {window}")
+        self.window_str = f"-{self.i_start}-to-{self.i_end}"
+    @property
+    def shape(self):
+        shape = list(self.forward.shape)
+        shape[0] = len(self)
+        return tuple(shape)
+    @debug_indexing
+    @expand_list_indexing
+    def __getitem__(self, n: FullIndex) -> NDArray[Any]:
+        def f(array):
+            return np.nanmean(array, axis=0)
+        if isinstance(n, slice):
+            n = (n,)
+        if isinstance(n, tuple):
+            first = n[0]
+            if len(n) > 1:
+                rest = n[1:]
+            else:
+                rest = ()
+            if isinstance(first, int):
+                slice_ = slice(first, first + self.i_start + self.i_end)
+                data = self.forward[(slice_,) + rest]
+                return f(data)
+            if isinstance(first, slice):
+                first = list(range(first.start or 0, first.stop or len(self), first.step or 1))
+            if isinstance(first, (list, tuple)):
+                first = [i if i >= 0 else len(self) + i for i in first]
+                if any(i >= len(self) for i in first):
+                    raise IndexError(f"Index out of range: {first}")
+                slices = [slice(i, i + self.i_start + self.i_end) for i in first]
+                data = [self.forward[(slice_,) + rest] for slice_ in slices]
+                res = [f(d) for d in data]
+                return np.array(res)
+            assert False, f"Expected int, slice, list or tuple as first element of tuple, got {type(first)}"
+        assert isinstance(n, int), f"Expected int, slice, tuple, got {type(n)}"
+        if n < 0:
+            n = len(self) + n
+        if n >= len(self):
+            raise IndexError(f"Index out of range: {n}")
+        slice_ = slice(n, n + self.i_start + self.i_end)
+        data = self.forward[slice_]
+        return f(data)
+    def __len__(self) -> int:
+        return len(self.forward) - (self.i_end + self.i_start - 1)
+    @cached_property
+    def dates(self) -> NDArray[np.datetime64]:
+        """Get the interpolated dates."""
+        dates = self.forward.dates
+        return dates[self.i_start : len(dates) - self.i_end + 1]
+    def tree(self) -> Node:
+        return Node(self, [self.forward.tree()], window=self.window_str)
+    @cached_property
+    def missing(self) -> set[int]:
+        """Get the missing data indices."""
+        result = []
+        for i in self.forward.missing:
+            for j in range(0, self.i_end + self.i_start):
+                result.append(i + j)
+        result = {x for x in result if x < self._len}
+        return result
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
+        return {}

anemoi/datasets/data/stores.py CHANGED Viewed

@@ -85,22 +85,20 @@ class S3Store(ReadOnlyStore):
     options using the anemoi configs.
     """
-    def __init__(self, url: str, region: str | None = None) -> None:
-        """Initialize the S3Store with a URL and optional region."""
-        from anemoi.utils.remote.s3 import s3_client
+    def __init__(self, url: str) -> None:
+        """Initialize the S3Store with a URL."""
-        _, _, self.bucket, self.key = url.split("/", 3)
-        self.s3 = s3_client(self.bucket, region=region)
+        self.url = url
     def __getitem__(self, key: str) -> bytes:
         """Retrieve an item from the store."""
+        from anemoi.utils.remote.s3 import get_object
         try:
-            response = self.s3.get_object(Bucket=self.bucket, Key=self.key + "/" + key)
-        except self.s3.exceptions.NoSuchKey:
+            return get_object(os.path.join(self.url, key))
+        except FileNotFoundError:
             raise KeyError(key)
-        return response["Body"].read()
 class DebugStore(ReadOnlyStore):
     """A store to debug the zarr loading."""

anemoi/datasets/dates/__init__.py CHANGED Viewed

@@ -58,6 +58,8 @@ def extend(x: str | list[Any] | tuple[Any, ...]) -> Iterator[datetime.datetime]:
 class DatesProvider:
     """Base class for date generation.
+    Examples
+    --------
     >>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-02 00:00", "frequency": "1d"}).values
     [datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 2, 0, 0)]

anemoi/datasets/dumper.py ADDED Viewed

@@ -0,0 +1,76 @@
+# (C) Copyright 2025 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+import datetime
+import io
+import logging
+import ruamel.yaml
+LOG = logging.getLogger(__name__)
+def represent_date(dumper, data):
+    if isinstance(data, datetime.datetime):
+        if data.tzinfo is None:
+            data = data.replace(tzinfo=datetime.timezone.utc)
+        data = data.astimezone(datetime.timezone.utc)
+        iso_str = data.replace(tzinfo=None).isoformat(timespec="seconds") + "Z"
+    else:
+        iso_str = data.isoformat()
+    return dumper.represent_scalar("tag:yaml.org,2002:timestamp", iso_str)
+# --- Represent multiline strings with | style ---
+def represent_multiline_str(dumper, data):
+    if "\n" in data:
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data.strip(), style="|")
+    return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+# --- Represent short lists inline (flow style) ---
+def represent_inline_list(dumper, data):
+    if not all(isinstance(i, (str, int, float, bool, type(None))) for i in data):
+        return dumper.represent_sequence("tag:yaml.org,2002:seq", data)
+    return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
+def yaml_dump(obj, order=None, stream=None, **kwargs):
+    if order:
+        def _ordering(k):
+            return order.index(k) if k in order else len(order)
+        obj = {k: v for k, v in sorted(obj.items(), key=lambda item: _ordering(item[0]))}
+    yaml = ruamel.yaml.YAML()
+    yaml.width = 120  # wrap long flow sequences
+    yaml.Representer.add_representer(datetime.date, represent_date)
+    yaml.Representer.add_representer(datetime.datetime, represent_date)
+    yaml.Representer.add_representer(str, represent_multiline_str)
+    yaml.Representer.add_representer(list, represent_inline_list)
+    data = ruamel.yaml.comments.CommentedMap()
+    for i, (k, v) in enumerate(obj.items()):
+        data[k] = v
+        if i > 0:
+            data.yaml_set_comment_before_after_key(key=k, before="\n")
+    if stream:
+        yaml.dump(data, stream=stream, **kwargs)
+    stream = io.StringIO()
+    yaml.dump(data, stream=stream, **kwargs)
+    return stream.getvalue()

anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl

anemoi-datasets 0.5.27py3-none-any.whl → 0.5.29py3-none-any.whl