PyPI - anemoi-datasets - Versions diffs - 0.5.7__py3-none-any.whl → 0.5.11__py3-none-any.whl - Mend

anemoi-datasets 0.5.7py3-none-any.whl → 0.5.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

anemoi/datasets/__init__.py +11 -3
anemoi/datasets/__main__.py +2 -3
anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/__init__.py +2 -3
anemoi/datasets/commands/cleanup.py +9 -0
anemoi/datasets/commands/compare.py +3 -3
anemoi/datasets/commands/copy.py +38 -68
anemoi/datasets/commands/create.py +20 -5
anemoi/datasets/commands/finalise-additions.py +9 -0
anemoi/datasets/commands/finalise.py +9 -0
anemoi/datasets/commands/init-additions.py +9 -0
anemoi/datasets/commands/init.py +9 -0
anemoi/datasets/commands/inspect.py +3 -1
anemoi/datasets/commands/load-additions.py +9 -0
anemoi/datasets/commands/load.py +9 -0
anemoi/datasets/commands/patch.py +9 -0
anemoi/datasets/commands/publish.py +9 -0
anemoi/datasets/commands/scan.py +9 -0
anemoi/datasets/compute/__init__.py +8 -0
anemoi/datasets/compute/recentre.py +3 -2
anemoi/datasets/create/__init__.py +62 -12
anemoi/datasets/create/check.py +4 -3
anemoi/datasets/create/chunks.py +3 -2
anemoi/datasets/create/config.py +5 -5
anemoi/datasets/create/functions/__init__.py +22 -7
anemoi/datasets/create/functions/filters/__init__.py +2 -1
anemoi/datasets/create/functions/filters/empty.py +3 -2
anemoi/datasets/create/functions/filters/noop.py +2 -2
anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -2
anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -2
anemoi/datasets/create/functions/filters/rename.py +16 -11
anemoi/datasets/create/functions/filters/rotate_winds.py +3 -2
anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +3 -2
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +3 -2
anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +2 -2
anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +2 -2
anemoi/datasets/create/functions/filters/speeddir_to_uv.py +3 -2
anemoi/datasets/create/functions/filters/unrotate_winds.py +3 -2
anemoi/datasets/create/functions/filters/uv_to_speeddir.py +3 -2
anemoi/datasets/create/functions/sources/__init__.py +2 -2
anemoi/datasets/create/functions/sources/accumulations.py +10 -4
anemoi/datasets/create/functions/sources/constants.py +3 -2
anemoi/datasets/create/functions/sources/empty.py +3 -2
anemoi/datasets/create/functions/sources/forcings.py +3 -2
anemoi/datasets/create/functions/sources/grib.py +8 -2
anemoi/datasets/create/functions/sources/hindcasts.py +3 -2
anemoi/datasets/create/functions/sources/mars.py +97 -17
anemoi/datasets/create/functions/sources/netcdf.py +3 -2
anemoi/datasets/create/functions/sources/opendap.py +2 -2
anemoi/datasets/create/functions/sources/recentre.py +3 -2
anemoi/datasets/create/functions/sources/source.py +3 -2
anemoi/datasets/create/functions/sources/tendencies.py +3 -2
anemoi/datasets/create/functions/sources/xarray/__init__.py +8 -3
anemoi/datasets/create/functions/sources/xarray/coordinates.py +3 -2
anemoi/datasets/create/functions/sources/xarray/field.py +6 -5
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +12 -4
anemoi/datasets/create/functions/sources/xarray/flavour.py +2 -2
anemoi/datasets/create/functions/sources/xarray/grid.py +2 -2
anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -2
anemoi/datasets/create/functions/sources/xarray/time.py +2 -2
anemoi/datasets/create/functions/sources/xarray/variable.py +6 -9
anemoi/datasets/create/functions/sources/xarray_kerchunk.py +2 -2
anemoi/datasets/create/functions/sources/xarray_zarr.py +2 -2
anemoi/datasets/create/functions/sources/zenodo.py +2 -2
anemoi/datasets/create/input/__init__.py +3 -17
anemoi/datasets/create/input/action.py +3 -8
anemoi/datasets/create/input/concat.py +3 -2
anemoi/datasets/create/input/context.py +3 -8
anemoi/datasets/create/input/data_sources.py +3 -9
anemoi/datasets/create/input/empty.py +3 -9
anemoi/datasets/create/input/filter.py +3 -9
anemoi/datasets/create/input/function.py +3 -9
anemoi/datasets/create/input/join.py +3 -2
anemoi/datasets/create/input/misc.py +3 -8
anemoi/datasets/create/input/pipe.py +9 -3
anemoi/datasets/create/input/repeated_dates.py +14 -8
anemoi/datasets/create/input/result.py +154 -12
anemoi/datasets/create/input/step.py +4 -9
anemoi/datasets/create/input/template.py +3 -2
anemoi/datasets/create/input/trace.py +3 -2
anemoi/datasets/create/patch.py +9 -1
anemoi/datasets/create/persistent.py +3 -2
anemoi/datasets/create/size.py +3 -2
anemoi/datasets/create/statistics/__init__.py +3 -2
anemoi/datasets/create/statistics/summary.py +3 -2
anemoi/datasets/create/utils.py +15 -2
anemoi/datasets/create/writer.py +3 -2
anemoi/datasets/create/zarr.py +3 -2
anemoi/datasets/data/__init__.py +27 -1
anemoi/datasets/data/concat.py +5 -1
anemoi/datasets/data/dataset.py +216 -37
anemoi/datasets/data/debug.py +4 -1
anemoi/datasets/data/ensemble.py +4 -1
anemoi/datasets/data/fill_missing.py +165 -0
anemoi/datasets/data/forwards.py +23 -1
anemoi/datasets/data/grids.py +236 -58
anemoi/datasets/data/indexing.py +4 -1
anemoi/datasets/data/interpolate.py +4 -1
anemoi/datasets/data/join.py +12 -9
anemoi/datasets/data/masked.py +36 -10
anemoi/datasets/data/merge.py +180 -0
anemoi/datasets/data/misc.py +18 -3
anemoi/datasets/data/missing.py +4 -1
anemoi/datasets/data/rescale.py +4 -1
anemoi/datasets/data/select.py +4 -1
anemoi/datasets/data/statistics.py +4 -1
anemoi/datasets/data/stores.py +66 -3
anemoi/datasets/data/subset.py +6 -1
anemoi/datasets/data/unchecked.py +4 -1
anemoi/datasets/data/xy.py +20 -5
anemoi/datasets/dates/__init__.py +9 -7
anemoi/datasets/dates/groups.py +4 -2
anemoi/datasets/grids.py +86 -2
anemoi/datasets/testing.py +3 -2
anemoi/datasets/utils/__init__.py +8 -0
anemoi/datasets/utils/fields.py +2 -2
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/METADATA +11 -29
anemoi_datasets-0.5.11.dist-info/RECORD +123 -0
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/WHEEL +1 -1
anemoi/datasets/fields.py +0 -66
anemoi_datasets-0.5.7.dist-info/RECORD +0 -122
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/top_level.txt +0 -0

anemoi/datasets/data/__init__.py CHANGED Viewed

@@ -1,6 +1,8 @@
-# (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
@@ -25,7 +27,31 @@ class MissingDateError(Exception):
     pass
+def _convert(x):
+    if isinstance(x, list):
+        return [_convert(a) for a in x]
+    if isinstance(x, tuple):
+        return tuple(_convert(a) for a in x)
+    if isinstance(x, dict):
+        return {k: _convert(v) for k, v in x.items()}
+    if x.__class__.__name__ in ("DictConfig", "ListConfig"):
+        from omegaconf import OmegaConf
+        return OmegaConf.to_container(x, resolve=True)
+    return x
 def open_dataset(*args, **kwargs):
+    # That will get rid of OmegaConf objects
+    args, kwargs = _convert(args), _convert(kwargs)
     ds = _open_dataset(*args, **kwargs)
     ds = ds.mutate()
     ds.arguments = {"args": args, "kwargs": kwargs}

anemoi/datasets/data/concat.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from functools import cached_property
@@ -148,6 +151,7 @@ def concat_factory(args, kwargs):
     datasets = kwargs.pop("concat")
     fill_missing_gaps = kwargs.pop("fill_missing_gaps", False)
     assert isinstance(datasets, (list, tuple))
     assert len(args) == 0

anemoi/datasets/data/dataset.py CHANGED Viewed

@@ -1,14 +1,16 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import datetime
 import json
 import logging
-import os
 import pprint
 import warnings
 from functools import cached_property
@@ -20,14 +22,38 @@ from anemoi.utils.dates import frequency_to_timedelta
 LOG = logging.getLogger(__name__)
+def _tidy(v):
+    if isinstance(v, (list, tuple, set)):
+        return [_tidy(i) for i in v]
+    if isinstance(v, dict):
+        return {k: _tidy(v) for k, v in v.items()}
+    if isinstance(v, datetime.datetime):
+        return v.isoformat()
+    if isinstance(v, datetime.date):
+        return v.isoformat()
+    if isinstance(v, datetime.timedelta):
+        return frequency_to_string(v)
+    if isinstance(v, Dataset):
+        # That can happen in the `arguments`
+        # if a dataset is passed as an argument
+        return repr(v)
+    if isinstance(v, slice):
+        return (v.start, v.stop, v.step)
+    return v
 class Dataset:
     arguments = {}
+    _name = None
     def mutate(self) -> "Dataset":
-        """
-        Give an opportunity to a subclass to return a new Dataset
+        """Give an opportunity to a subclass to return a new Dataset
         object of a different class, if needed.
         """
         return self
     def swap_with_parent(self, parent):
@@ -38,9 +64,32 @@ class Dataset:
         return len(self)
     def _subset(self, **kwargs):
+        if not kwargs:
+            return self.mutate()
+        name = kwargs.pop("name", None)
+        result = self.__subset(**kwargs)
+        result._name = name
+        return result
+    @property
+    def name(self):
+        return self._name
+    def __subset(self, **kwargs):
         if not kwargs:
             return self.mutate()
+        # This one must be first
+        if "fill_missing_dates" in kwargs:
+            from .fill_missing import fill_missing_dates_factory
+            fill_missing_dates = kwargs.pop("fill_missing_dates")
+            ds = fill_missing_dates_factory(self, fill_missing_dates, kwargs)
+            return ds._subset(**kwargs).mutate()
         if "start" in kwargs or "end" in kwargs:
             start = kwargs.pop("start", None)
             end = kwargs.pop("end", None)
@@ -64,12 +113,6 @@ class Dataset:
                 .mutate()
             )
-        if "interpolate_frequency" in kwargs:
-            from .interpolate import InterpolateFrequency
-            interpolate_frequency = kwargs.pop("interpolate_frequency")
-            return InterpolateFrequency(self, interpolate_frequency)._subset(**kwargs).mutate()
         if "select" in kwargs:
             from .select import Select
@@ -121,11 +164,11 @@ class Dataset:
             bbox = kwargs.pop("area")
             return Cropping(self, bbox)._subset(**kwargs).mutate()
-        if "missing_dates" in kwargs:
+        if "set_missing_dates" in kwargs:
             from .missing import MissingDates
-            missing_dates = kwargs.pop("missing_dates")
-            return MissingDates(self, missing_dates)._subset(**kwargs).mutate()
+            set_missing_dates = kwargs.pop("set_missing_dates")
+            return MissingDates(self, set_missing_dates)._subset(**kwargs).mutate()
         if "skip_missing_dates" in kwargs:
             from .missing import SkipMissingDates
@@ -139,6 +182,12 @@ class Dataset:
             if skip_missing_dates:
                 return SkipMissingDates(self, expected_access)._subset(**kwargs).mutate()
+        if "interpolate_frequency" in kwargs:
+            from .interpolate import InterpolateFrequency
+            interpolate_frequency = kwargs.pop("interpolate_frequency")
+            return InterpolateFrequency(self, interpolate_frequency)._subset(**kwargs).mutate()
         # Keep last
         if "shuffle" in kwargs:
             from .subset import Subset
@@ -222,41 +271,53 @@ class Dataset:
         shape.pop(drop_axis)
         return tuple(shape)
+    @property
+    def typed_variables(self):
+        from anemoi.transform.variables import Variable
+        constants = self.constant_fields
+        result = {}
+        for k, v in self.variables_metadata.items():
+            # TODO: Once all datasets are updated, we can remove this
+            v = v.copy()
+            if k in constants:
+                v["constant_in_time"] = True
+            if "is_constant_in_time" in v:
+                del v["is_constant_in_time"]
+            result[k] = Variable.from_dict(k, v)
+        return result
+    def _input_sources(self):
+        sources = []
+        self.collect_input_sources(sources)
+        return sources
     def metadata(self):
         import anemoi
-        def tidy(v):
-            if isinstance(v, (list, tuple, set)):
-                return [tidy(i) for i in v]
-            if isinstance(v, dict):
-                return {k: tidy(v) for k, v in v.items()}
-            if isinstance(v, str) and v.startswith("/"):
-                return os.path.basename(v)
-            if isinstance(v, datetime.datetime):
-                return v.isoformat()
-            if isinstance(v, datetime.date):
-                return v.isoformat()
-            if isinstance(v, datetime.timedelta):
-                return frequency_to_string(v)
-            if isinstance(v, Dataset):
-                # That can happen in the `arguments`
-                # if a dataset is passed as an argument
-                return repr(v)
-            if isinstance(v, slice):
-                return (v.start, v.stop, v.step)
-            return v
+        _, source_to_arrays = self._supporting_arrays_and_sources()
+        sources = []
+        for i, source in enumerate(self._input_sources()):
+            source_metadata = source.dataset_metadata().copy()
+            source_metadata["supporting_arrays"] = source_to_arrays[id(source)]
+            sources.append(source_metadata)
         md = dict(
             version=anemoi.datasets.__version__,
             arguments=self.arguments,
             **self.dataset_metadata(),
+            sources=sources,
+            supporting_arrays=source_to_arrays[id(self)],
         )
         try:
-            return json.loads(json.dumps(tidy(md)))
+            return json.loads(json.dumps(_tidy(md)))
         except Exception:
             LOG.exception("Failed to serialize metadata")
             pprint.pprint(md)
@@ -276,11 +337,72 @@ class Dataset:
             specific=self.metadata_specific(),
             frequency=self.frequency,
             variables=self.variables,
+            variables_metadata=self.variables_metadata,
             shape=self.shape,
+            dtype=str(self.dtype),
             start_date=self.start_date.astype(str),
             end_date=self.end_date.astype(str),
+            name=self.name,
         )
+    def _supporting_arrays(self, *path):
+        import numpy as np
+        def _path(path, name):
+            return "/".join(str(_) for _ in [*path, name])
+        result = {
+            _path(path, "latitudes"): self.latitudes,
+            _path(path, "longitudes"): self.longitudes,
+        }
+        collected = []
+        self.collect_supporting_arrays(collected, *path)
+        for path, name, array in collected:
+            assert isinstance(path, tuple) and isinstance(name, str)
+            assert isinstance(array, np.ndarray)
+            name = _path(path, name)
+            if name in result:
+                raise ValueError(f"Duplicate key {name}")
+            result[name] = array
+        return result
+    def supporting_arrays(self):
+        """Arrays to be saved in the checkpoints"""
+        arrays, _ = self._supporting_arrays_and_sources()
+        return arrays
+    def _supporting_arrays_and_sources(self):
+        source_to_arrays = {}
+        # Top levels arrays
+        result = self._supporting_arrays()
+        source_to_arrays[id(self)] = sorted(result.keys())
+        # Arrays from the input sources
+        for i, source in enumerate(self._input_sources()):
+            name = source.name if source.name is not None else f"source{i}"
+            src_arrays = source._supporting_arrays(name)
+            source_to_arrays[id(source)] = sorted(src_arrays.keys())
+            for k in src_arrays:
+                assert k not in result
+            result.update(src_arrays)
+        return result, source_to_arrays
+    def collect_supporting_arrays(self, collected, *path):
+        # Override this method to add more arrays
+        pass
     def metadata_specific(self, **kwargs):
         action = self.__class__.__name__.lower()
         # assert isinstance(self.frequency, datetime.timedelta), (self.frequency, self, action)
@@ -318,3 +440,60 @@ class Dataset:
     def get_dataset_names(self, names):
         raise NotImplementedError(self)
+    def computed_constant_fields(self):
+        # Call `constant_fields` instead of `computed_constant_fields`
+        try:
+            # If the tendencies are computed, we can use them
+            return sorted(self._compute_constant_fields_from_statistics())
+        except KeyError:
+            # This can happen if the tendencies are not computed
+            pass
+        return sorted(self._compute_constant_fields_from_a_few_samples())
+    def _compute_constant_fields_from_a_few_samples(self):
+        import numpy as np
+        # Otherwise, we need to compute them
+        dates = self.dates
+        indices = set(range(len(dates)))
+        indices -= self.missing
+        sample_count = min(4, len(indices))
+        count = len(indices)
+        p = slice(0, count, count // (sample_count - 1))
+        samples = list(range(*p.indices(count)))
+        samples.append(count - 1)  # Add last
+        samples = sorted(set(samples))
+        indices = list(indices)
+        samples = [indices[i] for i in samples]
+        assert set(samples) <= set(indices)  # Make sure we have the samples
+        first = None
+        constants = [True] * len(self.variables)
+        first = self[samples.pop(0)]
+        for sample in samples:
+            row = self[sample]
+            for i, (a, b) in enumerate(zip(row, first)):
+                if np.any(a != b):
+                    constants[i] = False
+        return [v for i, v in enumerate(self.variables) if constants[i]]
+    def _compute_constant_fields_from_statistics(self):
+        result = []
+        t = self.statistics_tendencies()
+        for i, v in enumerate(self.variables):
+            if t["mean"][i] == 0 and t["stdev"][i] == 0:
+                result.append(v)
+        return result

anemoi/datasets/data/debug.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 import os
 import textwrap

anemoi/datasets/data/ensemble.py CHANGED Viewed

@@ -1,10 +1,13 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
 from .debug import Node

anemoi/datasets/data/fill_missing.py ADDED Viewed

@@ -0,0 +1,165 @@
+# (C) Copyright 2024 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+import logging
+import numpy as np
+from anemoi.datasets.data import MissingDateError
+from .debug import Node
+from .debug import debug_indexing
+from .forwards import Forwards
+from .indexing import apply_index_to_slices_changes
+from .indexing import expand_list_indexing
+from .indexing import index_to_slices
+from .indexing import update_tuple
+LOG = logging.getLogger(__name__)
+class MissingDatesFill(Forwards):
+    def __init__(self, dataset):
+        super().__init__(dataset)
+        self._missing = set(dataset.missing)
+        self._warnings = set()
+    @debug_indexing
+    @expand_list_indexing
+    def _get_tuple(self, index):
+        index, changes = index_to_slices(index, self.shape)
+        index, previous = update_tuple(index, 0, slice(None))
+        result = self._get_slice(previous)
+        return apply_index_to_slices_changes(result[index], changes)
+    def _get_slice(self, s):
+        return np.stack([self[i] for i in range(*s.indices(self._len))])
+    @property
+    def missing(self):
+        return set()
+    @debug_indexing
+    def __getitem__(self, n):
+        try:
+            return self.forward[n]
+        except MissingDateError:
+            pass
+        if isinstance(n, tuple):
+            return self._get_tuple(n)
+        if isinstance(n, slice):
+            return self._get_slice(n)
+        if n < 0:
+            n += self._len
+        a = None
+        i = n
+        while a is None and i >= 0:
+            if i in self._missing:
+                i -= 1
+            else:
+                a = i
+        len = self._len
+        b = None
+        i = n
+        while b is None and n < len:
+            if i in self._missing:
+                i += 1
+            else:
+                b = i
+        return self._fill_missing(n, a, b)
+class MissingDatesClosest(MissingDatesFill):
+    def __init__(self, dataset, closest):
+        super().__init__(dataset)
+        self.closest = closest
+        self._closest = {}
+    def _fill_missing(self, n, a, b):
+        if n not in self._warnings:
+            LOG.warning(f"Missing date at index {n} ({self.dates[n]})")
+            if abs(n - a) == abs(b - n):
+                if self.closest == "up":
+                    u = b
+                else:
+                    u = a
+            else:
+                if abs(n - a) < abs(b - n):
+                    u = a
+                else:
+                    u = b
+            LOG.warning(f"Using closest date {u} ({self.dates[u]})")
+            self._closest[n] = u
+            self._warnings.add(n)
+        return self.forward[self._closest[n]]
+    def subclass_metadata_specific(self):
+        return {"closest": self.closest}
+    def tree(self):
+        return Node(self, [self.forward.tree()], closest=self.closest)
+class MissingDatesInterpolate(MissingDatesFill):
+    def __init__(self, dataset):
+        super().__init__(dataset)
+        self._alpha = {}
+    def _fill_missing(self, n, a, b):
+        if n not in self._warnings:
+            LOG.warning(f"Missing date at index {n} ({self.dates[n]})")
+            if a is None or b is None:
+                raise MissingDateError(
+                    f"Cannot interpolate at index {n} ({self.dates[n]}). Are the first or last date missing?"
+                )
+            assert a < n < b, (a, n, b)
+            alpha = (n - a) / (b - a)
+            assert 0 < alpha < 1, alpha
+            LOG.warning(f"Interpolating between index {a} ({self.dates[a]}) and {b} ({self.dates[b]})")
+            LOG.warning(f"Interpolation {1 - alpha:g} * ({self.dates[a]}) + {alpha:g} * ({self.dates[b]})")
+            self._alpha[n] = alpha
+            self._warnings.add(n)
+        alpha = self._alpha[n]
+        return self.forward[a] * (1 - alpha) + self.forward[b] * alpha
+    def subclass_metadata_specific(self):
+        return {}
+    def tree(self):
+        return Node(self, [self.forward.tree()])
+def fill_missing_dates_factory(dataset, method, kwargs):
+    if method == "closest":
+        closest = kwargs.get("closest", "up")
+        return MissingDatesClosest(dataset, closest=closest)
+    if method == "interpolate":
+        return MissingDatesInterpolate(dataset)
+    raise ValueError(f"Invalid `fill_missing_dates` method '{method}'")

anemoi/datasets/data/forwards.py CHANGED Viewed

@@ -1,11 +1,15 @@
-# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# (C) Copyright 2024 Anemoi contributors.
+#
 # This software is licensed under the terms of the Apache Licence Version 2.0
 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
 # In applying this licence, ECMWF does not waive the privileges and immunities
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 import logging
+import warnings
 from functools import cached_property
 import numpy as np
@@ -31,6 +35,12 @@ class Forwards(Dataset):
     def __getitem__(self, n):
         return self.forward[n]
+    @property
+    def name(self):
+        if self._name is not None:
+            return self._name
+        return self.forward.name
     @property
     def dates(self):
         return self.forward.dates
@@ -99,6 +109,12 @@ class Forwards(Dataset):
             **kwargs,
         )
+    def collect_supporting_arrays(self, collected, *path):
+        self.forward.collect_supporting_arrays(collected, *path)
+    def collect_input_sources(self, collected):
+        self.forward.collect_input_sources(collected)
     def source(self, index):
         return self.forward.source(index)
@@ -194,6 +210,12 @@ class Combined(Forwards):
             **kwargs,
         )
+    def collect_supporting_arrays(self, collected, *path):
+        warnings.warn(f"The behaviour of {self.__class__.__name__}.collect_supporting_arrays() is not well defined")
+        for i, d in enumerate(self.datasets):
+            name = d.name if d.name is not None else i
+            d.collect_supporting_arrays(collected, *path, name)
     @property
     def missing(self):
         raise NotImplementedError("missing() not implemented for Combined")

anemoi-datasets 0.5.7__py3-none-any.whl → 0.5.11__py3-none-any.whl

anemoi-datasets 0.5.7py3-none-any.whl → 0.5.11py3-none-any.whl