PyPI - anemoi-datasets - Versions diffs - 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

anemoi-datasets 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/cleanup.py +44 -0
anemoi/datasets/commands/create.py +50 -20
anemoi/datasets/commands/finalise-additions.py +45 -0
anemoi/datasets/commands/finalise.py +39 -0
anemoi/datasets/commands/init-additions.py +45 -0
anemoi/datasets/commands/init.py +67 -0
anemoi/datasets/commands/inspect.py +1 -1
anemoi/datasets/commands/load-additions.py +47 -0
anemoi/datasets/commands/load.py +47 -0
anemoi/datasets/commands/patch.py +39 -0
anemoi/datasets/create/__init__.py +961 -146
anemoi/datasets/create/check.py +5 -3
anemoi/datasets/create/config.py +53 -2
anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
anemoi/datasets/create/functions/sources/xarray/field.py +1 -1
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
anemoi/datasets/create/functions/sources/xarray/metadata.py +27 -29
anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
anemoi/datasets/create/input.py +23 -22
anemoi/datasets/create/statistics/__init__.py +39 -23
anemoi/datasets/create/utils.py +3 -2
anemoi/datasets/data/__init__.py +1 -0
anemoi/datasets/data/concat.py +46 -2
anemoi/datasets/data/dataset.py +109 -34
anemoi/datasets/data/forwards.py +17 -8
anemoi/datasets/data/grids.py +17 -3
anemoi/datasets/data/interpolate.py +133 -0
anemoi/datasets/data/misc.py +56 -66
anemoi/datasets/data/missing.py +240 -0
anemoi/datasets/data/select.py +7 -1
anemoi/datasets/data/stores.py +3 -3
anemoi/datasets/data/subset.py +47 -5
anemoi/datasets/data/unchecked.py +20 -22
anemoi/datasets/data/xy.py +125 -0
anemoi/datasets/dates/__init__.py +13 -66
anemoi/datasets/dates/groups.py +2 -2
anemoi/datasets/grids.py +66 -48
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/METADATA +5 -5
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/RECORD +47 -37
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/WHEEL +1 -1
anemoi/datasets/create/loaders.py +0 -936
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.4.5.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/statistics/__init__.py CHANGED Viewed

@@ -79,6 +79,37 @@ def to_datetimes(dates):
     return [to_datetime(d) for d in dates]
+def fix_variance(x, name, count, sums, squares):
+    assert count.shape == sums.shape == squares.shape
+    assert isinstance(x, float)
+    mean = sums / count
+    assert mean.shape == count.shape
+    if x >= 0:
+        return x
+    LOG.warning(f"Negative variance for {name=}, variance={x}")
+    magnitude = np.sqrt((squares / count + mean * mean) / 2)
+    LOG.warning(f"square / count - mean * mean =  {squares/count} - {mean*mean} = {squares/count - mean*mean}")
+    LOG.warning(f"Variable span order of magnitude is {magnitude}.")
+    LOG.warning(f"Count is {count}.")
+    variances = squares / count - mean * mean
+    assert variances.shape == squares.shape == mean.shape
+    if all(variances >= 0):
+        LOG.warning(f"All individual variances for {name} are positive, setting variance to 0.")
+        return 0
+    # if abs(x) < magnitude * 1e-6 and abs(x) < range * 1e-6:
+    #     LOG.warning("Variance is negative but very small.")
+    #     variances = squares / count - mean * mean
+    #     return 0
+    LOG.warning(f"ERROR at least one individual variance is negative ({np.nanmin(variances)}).")
+    return x
 def check_variance(x, variables_names, minimum, maximum, mean, count, sums, squares):
     if (x >= 0).all():
         return
@@ -292,39 +323,24 @@ class StatAggregator:
     def aggregate(self):
         minimum = np.nanmin(self.minimum, axis=0)
         maximum = np.nanmax(self.maximum, axis=0)
         sums = np.nansum(self.sums, axis=0)
         squares = np.nansum(self.squares, axis=0)
         count = np.nansum(self.count, axis=0)
         has_nans = np.any(self.has_nans, axis=0)
-        mean = sums / count
+        assert sums.shape == count.shape == squares.shape == minimum.shape == maximum.shape
-        assert sums.shape == count.shape == squares.shape == mean.shape == minimum.shape == maximum.shape
+        mean = sums / count
+        assert mean.shape == minimum.shape
         x = squares / count - mean * mean
-        # def fix_variance(x, name, minimum, maximum, mean, count, sums, squares):
-        #     assert x.shape == minimum.shape == maximum.shape == mean.shape == count.shape == sums.shape == squares.shape
-        #     assert x.shape == (1,)
-        #     x, minimum, maximum, mean, count, sums, squares = x[0], minimum[0], maximum[0], mean[0], count[0], sums[0], squares[0]
-        #     if x >= 0:
-        #         return x
-        #
-        #     order = np.sqrt((squares / count + mean * mean)/2)
-        #     range = maximum - minimum
-        #     LOG.warning(f"Negative variance for {name=}, variance={x}")
-        #     LOG.warning(f"square / count - mean * mean =  {squares / count} - {mean * mean} = {squares / count - mean * mean}")
-        #     LOG.warning(f"Variable order of magnitude is {order}.")
-        #     LOG.warning(f"Range is {range} ({maximum=} - {minimum=}).")
-        #     LOG.warning(f"Count is {count}.")
-        #     if abs(x) < order * 1e-6 and abs(x) < range * 1e-6:
-        #         LOG.warning(f"Variance is negative but very small, setting to 0.")
-        #         return x*0
-        #     return x
+        assert x.shape == minimum.shape
         for i, name in enumerate(self.variables_names):
             # remove negative variance due to numerical errors
-            # Not needed for now, fix_variance is disabled
-            # x[i] = fix_variance(x[i:i+1], name, minimum[i:i+1], maximum[i:i+1], mean[i:i+1], count[i:i+1], sums[i:i+1], squares[i:i+1])
+            x[i] = fix_variance(x[i], name, self.count[i : i + 1], self.sums[i : i + 1], self.squares[i : i + 1])
+        for i, name in enumerate(self.variables_names):
             check_variance(
                 x[i : i + 1],
                 [name],

anemoi/datasets/create/utils.py CHANGED Viewed

@@ -7,6 +7,7 @@
 # nor does it submit to any jurisdiction.
 #
+import datetime
 import os
 from contextlib import contextmanager
@@ -61,10 +62,10 @@ def make_list_int(value):
 def normalize_and_check_dates(dates, start, end, frequency, dtype="datetime64[s]"):
-    assert isinstance(frequency, int), frequency
+    assert isinstance(frequency, datetime.timedelta), frequency
     start = np.datetime64(start)
     end = np.datetime64(end)
-    delta = np.timedelta64(frequency, "h")
+    delta = np.timedelta64(frequency)
     res = []
     while start <= end:

anemoi/datasets/data/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ class MissingDateError(Exception):
 def open_dataset(*args, **kwargs):
     ds = _open_dataset(*args, **kwargs)
+    ds = ds.mutate()
     ds.arguments = {"args": args, "kwargs": kwargs}
     ds._check()
     return ds

anemoi/datasets/data/concat.py CHANGED Viewed

@@ -9,6 +9,7 @@ import logging
 from functools import cached_property
 import numpy as np
+from anemoi.utils.dates import frequency_to_timedelta
 from .debug import Node
 from .debug import debug_indexing
@@ -102,20 +103,63 @@ class Concat(ConcatMixin, Combined):
     def tree(self):
         return Node(self, [d.tree() for d in self.datasets])
+    @classmethod
+    def check_dataset_compatibility(cls, datasets, fill_missing_gaps=False):
+        # Study the dates
+        ranges = [(d.dates[0].astype(object), d.dates[-1].astype(object)) for d in datasets]
-def concat_factory(args, kwargs, zarr_root):
+        # Make sure the dates are disjoint
+        for i in range(len(ranges)):
+            r = ranges[i]
+            for j in range(i + 1, len(ranges)):
+                s = ranges[j]
+                if r[0] <= s[0] <= r[1] or r[0] <= s[1] <= r[1]:
+                    raise ValueError(f"Overlapping dates: {r} and {s} ({datasets[i]} {datasets[j]})")
+        # For now we should have the datasets in order with no gaps
+        frequency = frequency_to_timedelta(datasets[0].frequency)
+        result = []
+        for i in range(len(ranges) - 1):
+            result.append(datasets[i])
+            r = ranges[i]
+            s = ranges[i + 1]
+            if r[1] + frequency != s[0]:
+                if fill_missing_gaps:
+                    from .missing import MissingDataset
+                    result.append(MissingDataset(datasets[i], r[1] + frequency, s[0] - frequency))
+                else:
+                    r = [str(e) for e in r]
+                    s = [str(e) for e in s]
+                    raise ValueError(
+                        "Datasets must be sorted by dates, with no gaps: "
+                        f"{r} and {s} ({datasets[i]} {datasets[i+1]})"
+                    )
+        result.append(datasets[-1])
+        assert len(result) >= len(datasets), (len(result), len(datasets))
+        return result
+def concat_factory(args, kwargs):
     datasets = kwargs.pop("concat")
+    fill_missing_gaps = kwargs.pop("fill_missing_gaps", False)
     assert isinstance(datasets, (list, tuple))
     assert len(args) == 0
     assert isinstance(datasets, (list, tuple))
-    datasets = [_open(e, zarr_root) for e in datasets]
+    datasets = [_open(e) for e in datasets]
     if len(datasets) == 1:
         return datasets[0]._subset(**kwargs)
     datasets, kwargs = _auto_adjust(datasets, kwargs)
+    datasets = Concat.check_dataset_compatibility(datasets, fill_missing_gaps)
     return Concat(datasets)._subset(**kwargs)

anemoi/datasets/data/dataset.py CHANGED Viewed

@@ -5,24 +5,37 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
+import datetime
+import json
 import logging
 import os
+import pprint
 import warnings
 from functools import cached_property
+from anemoi.utils.dates import frequency_to_seconds
+from anemoi.utils.dates import frequency_to_string
+from anemoi.utils.dates import frequency_to_timedelta
 LOG = logging.getLogger(__name__)
 class Dataset:
     arguments = {}
+    def mutate(self):
+        return self
+    def swap_with_parent(self, parent):
+        return parent
     @cached_property
     def _len(self):
         return len(self)
     def _subset(self, **kwargs):
         if not kwargs:
-            return self
+            return self.mutate()
         if "start" in kwargs or "end" in kwargs:
             start = kwargs.pop("start", None)
@@ -30,37 +43,52 @@ class Dataset:
             from .subset import Subset
-            return Subset(self, self._dates_to_indices(start, end), dict(start=start, end=end))._subset(**kwargs)
+            return (
+                Subset(self, self._dates_to_indices(start, end), dict(start=start, end=end))._subset(**kwargs).mutate()
+            )
         if "frequency" in kwargs:
             from .subset import Subset
+            if "interpolate_frequency" in kwargs:
+                raise ValueError("Cannot use both `frequency` and `interpolate_frequency`")
             frequency = kwargs.pop("frequency")
-            return Subset(self, self._frequency_to_indices(frequency), dict(frequency=frequency))._subset(**kwargs)
+            return (
+                Subset(self, self._frequency_to_indices(frequency), dict(frequency=frequency))
+                ._subset(**kwargs)
+                .mutate()
+            )
+        if "interpolate_frequency" in kwargs:
+            from .interpolate import InterpolateFrequency
+            interpolate_frequency = kwargs.pop("interpolate_frequency")
+            return InterpolateFrequency(self, interpolate_frequency)._subset(**kwargs).mutate()
         if "select" in kwargs:
             from .select import Select
             select = kwargs.pop("select")
-            return Select(self, self._select_to_columns(select), {"select": select})._subset(**kwargs)
+            return Select(self, self._select_to_columns(select), {"select": select})._subset(**kwargs).mutate()
         if "drop" in kwargs:
             from .select import Select
             drop = kwargs.pop("drop")
-            return Select(self, self._drop_to_columns(drop), {"drop": drop})._subset(**kwargs)
+            return Select(self, self._drop_to_columns(drop), {"drop": drop})._subset(**kwargs).mutate()
         if "reorder" in kwargs:
             from .select import Select
             reorder = kwargs.pop("reorder")
-            return Select(self, self._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs)
+            return Select(self, self._reorder_to_columns(reorder), {"reoder": reorder})._subset(**kwargs).mutate()
         if "rename" in kwargs:
             from .select import Rename
             rename = kwargs.pop("rename")
-            return Rename(self, rename)._subset(**kwargs)
+            return Rename(self, rename)._subset(**kwargs).mutate()
         if "statistics" in kwargs:
             from ..data import open_dataset
@@ -68,20 +96,38 @@ class Dataset:
             statistics = kwargs.pop("statistics")
-            return Statistics(self, open_dataset(statistics))._subset(**kwargs)
+            return Statistics(self, open_dataset(statistics))._subset(**kwargs).mutate()
         if "thinning" in kwargs:
             from .masked import Thinning
             thinning = kwargs.pop("thinning")
             method = kwargs.pop("method", "every-nth")
-            return Thinning(self, thinning, method)._subset(**kwargs)
+            return Thinning(self, thinning, method)._subset(**kwargs).mutate()
         if "area" in kwargs:
             from .masked import Cropping
             bbox = kwargs.pop("area")
-            return Cropping(self, bbox)._subset(**kwargs)
+            return Cropping(self, bbox)._subset(**kwargs).mutate()
+        if "missing_dates" in kwargs:
+            from .missing import MissingDates
+            missing_dates = kwargs.pop("missing_dates")
+            return MissingDates(self, missing_dates)._subset(**kwargs).mutate()
+        if "skip_missing_dates" in kwargs:
+            from .missing import SkipMissingDates
+            if "expected_access" not in kwargs:
+                raise ValueError("`expected_access` is required with `skip_missing_dates`")
+            skip_missing_dates = kwargs.pop("skip_missing_dates")
+            expected_access = kwargs.pop("expected_access")
+            if skip_missing_dates:
+                return SkipMissingDates(self, expected_access)._subset(**kwargs).mutate()
         # Keep last
         if "shuffle" in kwargs:
@@ -90,15 +136,14 @@ class Dataset:
             shuffle = kwargs.pop("shuffle")
             if shuffle:
-                return Subset(self, self._shuffle_indices(), dict(shuffle=True))._subset(**kwargs)
+                return Subset(self, self._shuffle_indices(), dict(shuffle=True))._subset(**kwargs).mutate()
         raise NotImplementedError("Unsupported arguments: " + ", ".join(kwargs))
     def _frequency_to_indices(self, frequency):
-        from .misc import _frequency_to_hours
-        requested_frequency = _frequency_to_hours(frequency)
-        dataset_frequency = _frequency_to_hours(self.frequency)
+        requested_frequency = frequency_to_seconds(frequency)
+        dataset_frequency = frequency_to_seconds(self.frequency)
         assert requested_frequency % dataset_frequency == 0
         # Question: where do we start? first date, or first date that is a multiple of the frequency?
         step = requested_frequency // dataset_frequency
@@ -171,37 +216,71 @@ class Dataset:
         import anemoi
         def tidy(v):
-            if isinstance(v, (list, tuple)):
+            if isinstance(v, (list, tuple, set)):
                 return [tidy(i) for i in v]
             if isinstance(v, dict):
                 return {k: tidy(v) for k, v in v.items()}
             if isinstance(v, str) and v.startswith("/"):
                 return os.path.basename(v)
+            if isinstance(v, datetime.datetime):
+                return v.isoformat()
+            if isinstance(v, datetime.date):
+                return v.isoformat()
+            if isinstance(v, datetime.timedelta):
+                return frequency_to_string(v)
+            if isinstance(v, Dataset):
+                # That can happen in the `arguments`
+                # if a dataset is passed as an argument
+                return repr(v)
+            if isinstance(v, slice):
+                return (v.start, v.stop, v.step)
             return v
-        return tidy(
-            dict(
-                version=anemoi.datasets.__version__,
-                shape=self.shape,
-                arguments=self.arguments,
-                specific=self.metadata_specific(),
-                frequency=self.frequency,
-                variables=self.variables,
-                start_date=self.dates[0].astype(str),
-                end_date=self.dates[-1].astype(str),
-            )
+        md = dict(
+            version=anemoi.datasets.__version__,
+            arguments=self.arguments,
+            **self.dataset_metadata(),
+        )
+        try:
+            return json.loads(json.dumps(tidy(md)))
+        except Exception:
+            LOG.exception("Failed to serialize metadata")
+            pprint.pprint(md)
+            raise
+    @property
+    def start_date(self):
+        return self.dates[0]
+    @property
+    def end_date(self):
+        return self.dates[-1]
+    def dataset_metadata(self):
+        return dict(
+            specific=self.metadata_specific(),
+            frequency=self.frequency,
+            variables=self.variables,
+            shape=self.shape,
+            start_date=self.start_date.astype(str),
+            end_date=self.end_date.astype(str),
         )
     def metadata_specific(self, **kwargs):
         action = self.__class__.__name__.lower()
-        assert isinstance(self.frequency, int), (self.frequency, self, action)
+        # assert isinstance(self.frequency, datetime.timedelta), (self.frequency, self, action)
         return dict(
             action=action,
             variables=self.variables,
             shape=self.shape,
-            frequency=self.frequency,
-            start_date=self.dates[0].astype(str),
-            end_date=self.dates[-1].astype(str),
+            frequency=frequency_to_string(frequency_to_timedelta(self.frequency)),
+            start_date=self.start_date.astype(str),
+            end_date=self.end_date.astype(str),
             **kwargs,
         )
@@ -220,10 +299,6 @@ class Dataset:
             if n.startswith("_") and not n.startswith("__"):
                 warnings.warn(f"Private method {n} is overriden in {ds.__class__.__name__}")
-        # for n in ('metadata_specific', 'tree', 'source'):
-        #     if n not in overriden:
-        #         warnings.warn(f"Method {n} is not overriden in {ds.__class__.__name__}")
     def _repr_html_(self):
         return self.tree().html()

anemoi/datasets/data/forwards.py CHANGED Viewed

@@ -23,7 +23,7 @@ LOG = logging.getLogger(__name__)
 class Forwards(Dataset):
     def __init__(self, forward):
-        self.forward = forward
+        self.forward = forward.mutate()
     def __len__(self):
         return len(self.forward)
@@ -118,6 +118,9 @@ class Combined(Forwards):
         # Forward most properties to the first dataset
         super().__init__(datasets[0])
+    def mutate(self):
+        return self
     def check_same_resolution(self, d1, d2):
         if d1.resolution != d2.resolution:
             raise ValueError(f"Incompatible resolutions: {d1.resolution} and {d2.resolution} ({d1} {d2})")
@@ -187,14 +190,9 @@ class Combined(Forwards):
             **kwargs,
         )
-    @cached_property
+    @property
     def missing(self):
-        offset = 0
-        result = set()
-        for d in self.datasets:
-            result.update(offset + m for m in d.missing)
-            offset += len(d)
-        return result
+        raise NotImplementedError("missing() not implemented for Combined")
     def get_dataset_names(self, names):
         for d in self.datasets:
@@ -249,3 +247,14 @@ class GivenAxis(Combined):
             return self._get_slice(n)
         return np.concatenate([d[n] for d in self.datasets], axis=self.axis - 1)
+    @cached_property
+    def missing(self):
+        offset = 0
+        result = set()
+        for d in self.datasets:
+            print("--->", d.missing, d)
+            result.update(offset + m for m in d.missing)
+            if self.axis == 0:  # Advance if axis is time
+                offset += len(d)
+        return result

anemoi/datasets/data/grids.py CHANGED Viewed

@@ -128,7 +128,7 @@ class Grids(GridsBase):
 class Cutout(GridsBase):
-    def __init__(self, datasets, axis):
+    def __init__(self, datasets, axis, min_distance_km=None, cropping_distance=2.0, neighbours=5, plot=False):
         from anemoi.datasets.grids import cutout_mask
         super().__init__(datasets, axis)
@@ -144,7 +144,10 @@ class Cutout(GridsBase):
             self.lam.longitudes,
             self.globe.latitudes,
             self.globe.longitudes,
-            # plot="cutout",
+            plot=plot,
+            min_distance_km=min_distance_km,
+            cropping_distance=cropping_distance,
+            neighbours=neighbours,
         )
         assert len(self.mask) == self.globe.shape[3], (
             len(self.mask),
@@ -229,6 +232,10 @@ def cutout_factory(args, kwargs):
     cutout = kwargs.pop("cutout")
     axis = kwargs.pop("axis", 3)
+    plot = kwargs.pop("plot", None)
+    min_distance_km = kwargs.pop("min_distance_km", None)
+    cropping_distance = kwargs.pop("cropping_distance", 2.0)
+    neighbours = kwargs.pop("neighbours", 5)
     assert len(args) == 0
     assert isinstance(cutout, (list, tuple))
@@ -236,4 +243,11 @@ def cutout_factory(args, kwargs):
     datasets = [_open(e) for e in cutout]
     datasets, kwargs = _auto_adjust(datasets, kwargs)
-    return Cutout(datasets, axis=axis)._subset(**kwargs)
+    return Cutout(
+        datasets,
+        axis=axis,
+        neighbours=neighbours,
+        min_distance_km=min_distance_km,
+        cropping_distance=cropping_distance,
+        plot=plot,
+    )._subset(**kwargs)

anemoi/datasets/data/interpolate.py ADDED Viewed

@@ -0,0 +1,133 @@
+# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+import logging
+from functools import cached_property
+import numpy as np
+from anemoi.utils.dates import frequency_to_timedelta
+from .debug import Node
+from .debug import debug_indexing
+from .forwards import Forwards
+from .indexing import apply_index_to_slices_changes
+from .indexing import expand_list_indexing
+from .indexing import index_to_slices
+from .indexing import update_tuple
+LOG = logging.getLogger(__name__)
+class InterpolateFrequency(Forwards):
+    def __init__(self, dataset, frequency):
+        super().__init__(dataset)
+        self._frequency = frequency_to_timedelta(frequency)
+        self.seconds = self._frequency.total_seconds()
+        other_seconds = dataset.frequency.total_seconds()
+        self.seconds = int(self.seconds)
+        assert self.seconds == self._frequency.total_seconds()
+        other_seconds = int(other_seconds)
+        assert other_seconds == dataset.frequency.total_seconds()
+        if self.seconds >= other_seconds:
+            raise ValueError(
+                f"Interpolate frequency {self._frequency} must be more frequent than dataset frequency {dataset.frequency}"
+            )
+        if other_seconds % self.seconds != 0:
+            raise ValueError(
+                f"Interpolate frequency {self._frequency}  must be a multiple of the dataset frequency {dataset.frequency}"
+            )
+        self.ratio = other_seconds // self.seconds
+        self.alphas = np.linspace(0, 1, self.ratio + 1)
+        self.other_len = len(dataset)
+    @debug_indexing
+    @expand_list_indexing
+    def _get_tuple(self, index):
+        index, changes = index_to_slices(index, self.shape)
+        index, previous = update_tuple(index, 0, slice(None))
+        result = self._get_slice(previous)
+        return apply_index_to_slices_changes(result[index], changes)
+    def _get_slice(self, s):
+        return np.stack([self[i] for i in range(*s.indices(self._len))])
+    @debug_indexing
+    def __getitem__(self, n):
+        if isinstance(n, tuple):
+            return self._get_tuple(n)
+        if isinstance(n, slice):
+            return self._get_slice(n)
+        if n < 0:
+            n += self._len
+        if n == self._len - 1:
+            # Special case for the last element
+            return self.forward[-1]
+        i = n // self.ratio
+        x = n % self.ratio
+        if x == 0:
+            # No interpolation needed
+            return self.forward[i]
+        alpha = self.alphas[x]
+        assert 0 < alpha < 1, alpha
+        return self.forward[i] * (1 - alpha) + self.forward[i + 1] * alpha
+    def __len__(self):
+        return (self.other_len - 1) * self.ratio + 1
+    @property
+    def frequency(self):
+        return self._frequency
+    @cached_property
+    def dates(self):
+        result = []
+        deltas = [np.timedelta64(self.seconds * i, "s") for i in range(self.ratio)]
+        for d in self.forward.dates[:-1]:
+            for i in deltas:
+                result.append(d + i)
+        result.append(self.forward.dates[-1])
+        return np.array(result)
+    @property
+    def shape(self):
+        return (self._len,) + self.forward.shape[1:]
+    def tree(self):
+        return Node(self, [self.forward.tree()], frequency=self.frequency)
+    @cached_property
+    def missing(self):
+        result = []
+        j = 0
+        for i in range(self.other_len):
+            missing = i in self.forward.missing
+            for _ in range(self.ratio):
+                if missing:
+                    result.append(j)
+                j += 1
+        result = set(x for x in result if x < self._len)
+        return result
+    def subclass_metadata_specific(self):
+        return {
+            # "frequency": frequency_to_string(self._frequency),
+        }

anemoi-datasets 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

anemoi-datasets 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl