PyPI - anemoi-datasets - Versions diffs - 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl - Mend

anemoi-datasets 0.5.26py3-none-any.whl → 0.5.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

anemoi/datasets/__init__.py +1 -2
anemoi/datasets/_version.py +16 -3
anemoi/datasets/commands/check.py +1 -1
anemoi/datasets/commands/copy.py +1 -2
anemoi/datasets/commands/create.py +1 -1
anemoi/datasets/commands/inspect.py +27 -35
anemoi/datasets/commands/recipe/__init__.py +93 -0
anemoi/datasets/commands/recipe/format.py +55 -0
anemoi/datasets/commands/recipe/migrate.py +555 -0
anemoi/datasets/commands/validate.py +59 -0
anemoi/datasets/compute/recentre.py +3 -6
anemoi/datasets/create/__init__.py +64 -26
anemoi/datasets/create/check.py +10 -12
anemoi/datasets/create/chunks.py +1 -2
anemoi/datasets/create/config.py +5 -6
anemoi/datasets/create/input/__init__.py +44 -65
anemoi/datasets/create/input/action.py +296 -238
anemoi/datasets/create/input/context/__init__.py +71 -0
anemoi/datasets/create/input/context/field.py +54 -0
anemoi/datasets/create/input/data_sources.py +7 -9
anemoi/datasets/create/input/misc.py +2 -75
anemoi/datasets/create/input/repeated_dates.py +11 -130
anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
anemoi/datasets/create/input/trace.py +1 -1
anemoi/datasets/create/patch.py +1 -2
anemoi/datasets/create/persistent.py +3 -5
anemoi/datasets/create/size.py +1 -3
anemoi/datasets/create/sources/accumulations.py +120 -145
anemoi/datasets/create/sources/accumulations2.py +20 -53
anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
anemoi/datasets/create/sources/constants.py +39 -40
anemoi/datasets/create/sources/empty.py +22 -19
anemoi/datasets/create/sources/fdb.py +133 -0
anemoi/datasets/create/sources/forcings.py +29 -29
anemoi/datasets/create/sources/grib.py +94 -78
anemoi/datasets/create/sources/grib_index.py +57 -55
anemoi/datasets/create/sources/hindcasts.py +57 -59
anemoi/datasets/create/sources/legacy.py +10 -62
anemoi/datasets/create/sources/mars.py +121 -149
anemoi/datasets/create/sources/netcdf.py +28 -25
anemoi/datasets/create/sources/opendap.py +28 -26
anemoi/datasets/create/sources/patterns.py +4 -6
anemoi/datasets/create/sources/recentre.py +46 -48
anemoi/datasets/create/sources/repeated_dates.py +44 -0
anemoi/datasets/create/sources/source.py +26 -51
anemoi/datasets/create/sources/tendencies.py +68 -98
anemoi/datasets/create/sources/xarray.py +4 -6
anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
anemoi/datasets/create/sources/xarray_support/field.py +20 -16
anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
anemoi/datasets/create/sources/xarray_support/time.py +10 -13
anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
anemoi/datasets/create/sources/xarray_zarr.py +28 -25
anemoi/datasets/create/sources/zenodo.py +43 -41
anemoi/datasets/create/statistics/__init__.py +3 -6
anemoi/datasets/create/testing.py +4 -0
anemoi/datasets/create/typing.py +1 -2
anemoi/datasets/create/utils.py +0 -43
anemoi/datasets/create/zarr.py +7 -2
anemoi/datasets/data/__init__.py +15 -6
anemoi/datasets/data/complement.py +7 -12
anemoi/datasets/data/concat.py +5 -8
anemoi/datasets/data/dataset.py +48 -47
anemoi/datasets/data/debug.py +7 -9
anemoi/datasets/data/ensemble.py +4 -6
anemoi/datasets/data/fill_missing.py +7 -10
anemoi/datasets/data/forwards.py +22 -26
anemoi/datasets/data/grids.py +12 -168
anemoi/datasets/data/indexing.py +9 -12
anemoi/datasets/data/interpolate.py +7 -15
anemoi/datasets/data/join.py +8 -12
anemoi/datasets/data/masked.py +6 -11
anemoi/datasets/data/merge.py +5 -9
anemoi/datasets/data/misc.py +41 -45
anemoi/datasets/data/missing.py +11 -16
anemoi/datasets/data/observations/__init__.py +8 -14
anemoi/datasets/data/padded.py +3 -5
anemoi/datasets/data/records/backends/__init__.py +2 -2
anemoi/datasets/data/rescale.py +5 -12
anemoi/datasets/data/rolling_average.py +141 -0
anemoi/datasets/data/select.py +13 -16
anemoi/datasets/data/statistics.py +4 -7
anemoi/datasets/data/stores.py +22 -29
anemoi/datasets/data/subset.py +8 -11
anemoi/datasets/data/unchecked.py +7 -11
anemoi/datasets/data/xy.py +25 -21
anemoi/datasets/dates/__init__.py +15 -18
anemoi/datasets/dates/groups.py +7 -10
anemoi/datasets/dumper.py +76 -0
anemoi/datasets/grids.py +4 -185
anemoi/datasets/schemas/recipe.json +131 -0
anemoi/datasets/testing.py +93 -7
anemoi/datasets/validate.py +598 -0
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
anemoi/datasets/create/filter.py +0 -48
anemoi/datasets/create/input/concat.py +0 -164
anemoi/datasets/create/input/context.py +0 -89
anemoi/datasets/create/input/empty.py +0 -54
anemoi/datasets/create/input/filter.py +0 -118
anemoi/datasets/create/input/function.py +0 -233
anemoi/datasets/create/input/join.py +0 -130
anemoi/datasets/create/input/pipe.py +0 -66
anemoi/datasets/create/input/step.py +0 -177
anemoi/datasets/create/input/template.py +0 -162
anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0

anemoi/datasets/create/sources/repeated_dates.py ADDED Viewed

@@ -0,0 +1,44 @@
+# (C) Copyright 2024 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+import logging
+from typing import Any
+from anemoi.transform.fields import new_field_with_valid_datetime
+from anemoi.transform.fields import new_fieldlist_from_list
+from anemoi.datasets.create.input.repeated_dates import DateMapper
+from anemoi.datasets.create.source import Source
+from anemoi.datasets.create.sources import source_registry
+LOG = logging.getLogger(__name__)
+@source_registry.register("repeated_dates")
+class RepeatedDatesSource(Source):
+    def __init__(self, context, source: Any, mode: str, **kwargs) -> None:
+        # assert False, (context, source, mode, kwargs)
+        super().__init__(context, **kwargs)
+        self.mapper = DateMapper.from_mode(mode, source, kwargs)
+        self.source = source
+    def execute(self, group_of_dates):
+        source = self.context.create_source(self.source, "data_sources", str(id(self)))
+        result = []
+        for one_date_group, many_dates_group in self.mapper.transform(group_of_dates):
+            print(f"one_date_group: {one_date_group}, many_dates_group: {many_dates_group}")
+            source_results = source(self.context, one_date_group)
+            for field in source_results:
+                for date in many_dates_group:
+                    result.append(new_field_with_valid_datetime(field, date))
+        return new_fieldlist_from_list(result)

anemoi/datasets/create/sources/source.py CHANGED Viewed

@@ -9,64 +9,39 @@
 from datetime import datetime
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Optional
 from earthkit.data import from_source
-from anemoi.datasets.create.utils import to_datetime_list
+from anemoi.datasets.create.sources import source_registry
-from .legacy import legacy_source
+from .legacy import LegacySource
-@legacy_source(__file__)
-def source(context: Optional[Any], dates: List[datetime], **kwargs: Any) -> Any:
-    """Generates a source based on the provided context, dates, and additional keyword arguments.
+@source_registry.register("source")
+class GenericSource(LegacySource):
-    Parameters
-    ----------
-    context : Optional[Any]
-        The context in which the source is generated.
-    dates : List[datetime]
-        A list of datetime objects representing the dates.
-    **kwargs : Any
-        Additional keyword arguments for the source generation.
+    @staticmethod
+    def _execute(context: Any | None, dates: list[datetime], **kwargs: Any) -> Any:
+        """Generates a source based on the provided context, dates, and additional keyword arguments.
-    Returns
-    -------
-    Any
-        The generated source.
-    """
-    name = kwargs.pop("name")
-    context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
-    if kwargs["date"] == "$from_dates":
-        kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
-    if kwargs["time"] == "$from_dates":
-        kwargs["time"] = list({d.strftime("%H%M") for d in dates})
-    return from_source(name, **kwargs)
+        Parameters
+        ----------
+        context : Optional[Any]
+            The context in which the source is generated.
+        dates : List[datetime]
+            A list of datetime objects representing the dates.
+        **kwargs : Any
+            Additional keyword arguments for the source generation.
-execute = source
-if __name__ == "__main__":
-    import yaml
-    config: Dict[str, Any] = yaml.safe_load(
+        Returns
+        -------
+        Any
+            The generated source.
         """
-      name: mars
-      class: ea
-      expver: '0001'
-      grid: 20.0/20.0
-      levtype: sfc
-      param: [2t]
-      number: [0, 1]
-      date: $from_dates
-      time: $from_dates
-    """
-    )
-    dates: List[str] = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
-    dates = to_datetime_list(dates)
-    for f in source(None, dates, **config):
-        print(f, f.to_numpy().mean())
+        name = kwargs.pop("name")
+        context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
+        if kwargs["date"] == "$from_dates":
+            kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
+        if kwargs["time"] == "$from_dates":
+            kwargs["time"] = list({d.strftime("%H%M") for d in dates})
+        return from_source(name, **kwargs)

anemoi/datasets/create/sources/tendencies.py CHANGED Viewed

@@ -10,16 +10,13 @@
 import datetime
 from collections import defaultdict
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Tuple
 from earthkit.data.core.temporary import temp_file
 from earthkit.data.readers.grib.output import new_grib_output
-from anemoi.datasets.create.utils import to_datetime_list
+from anemoi.datasets.create.sources import source_registry
-from .legacy import legacy_source
+from .legacy import LegacySource
 def _date_to_datetime(d: Any) -> Any:
@@ -63,7 +60,7 @@ def normalise_time_delta(t: Any) -> datetime.timedelta:
     return t
-def group_by_field(ds: Any) -> Dict[Tuple, List[Any]]:
+def group_by_field(ds: Any) -> dict[tuple, list[Any]]:
     """Groups fields by their metadata excluding 'date', 'time', and 'step'.
     Parameters
@@ -86,116 +83,89 @@ def group_by_field(ds: Any) -> Dict[Tuple, List[Any]]:
     return d
-@legacy_source(__file__)
-def tendencies(dates: List[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
-    """Computes tendencies for the given dates and time increment.
+@source_registry.register("tendencies")
+class TendenciesSource(LegacySource):
-    Parameters
-    ----------
-    dates : List[datetime.datetime]
-        A list of datetime objects.
-    time_increment : Any
-        A time increment string ending with 'h' or a datetime.timedelta object.
-    **kwargs : Any
-        Additional keyword arguments.
-    Returns
-    -------
-    Any
-        A dataset object with computed tendencies.
-    """
-    print("✅", kwargs)
-    time_increment = normalise_time_delta(time_increment)
-    shifted_dates = [d - time_increment for d in dates]
-    all_dates = sorted(list(set(dates + shifted_dates)))
+    @staticmethod
+    def _execute(dates: list[datetime.datetime], time_increment: Any, **kwargs: Any) -> Any:
+        """Computes tendencies for the given dates and time increment.
-    # from .mars import execute as mars
-    from anemoi.datasets.create.mars import execute as mars
+        Parameters
+        ----------
+        dates : List[datetime.datetime]
+            A list of datetime objects.
+        time_increment : Any
+            A time increment string ending with 'h' or a datetime.timedelta object.
+        **kwargs : Any
+            Additional keyword arguments.
-    ds = mars(dates=all_dates, **kwargs)
-    dates_in_data = ds.unique_values("valid_datetime", progress_bar=False)["valid_datetime"]
-    for d in all_dates:
-        assert d.isoformat() in dates_in_data, d
-    ds1 = ds.sel(valid_datetime=[d.isoformat() for d in dates])
-    ds2 = ds.sel(valid_datetime=[d.isoformat() for d in shifted_dates])
-    assert len(ds1) == len(ds2), (len(ds1), len(ds2))
-    group1 = group_by_field(ds1)
-    group2 = group_by_field(ds2)
+        Returns
+        -------
+        Any
+            A dataset object with computed tendencies.
+        """
+        print("✅", kwargs)
+        time_increment = normalise_time_delta(time_increment)
-    assert group1.keys() == group2.keys(), (group1.keys(), group2.keys())
+        shifted_dates = [d - time_increment for d in dates]
+        all_dates = sorted(list(set(dates + shifted_dates)))
-    # prepare output tmp file so we can read it back
-    tmp = temp_file()
-    path = tmp.path
-    out = new_grib_output(path)
+        from .mars import mars
-    for k in group1:
-        assert len(group1[k]) == len(group2[k]), k
-        print()
-        print("❌", k)
+        ds = mars(dates=all_dates, **kwargs)
-        for field, b_field in zip(group1[k], group2[k]):
-            for k in ["param", "level", "number", "grid", "shape"]:
-                assert field.metadata(k) == b_field.metadata(k), (
-                    k,
-                    field.metadata(k),
-                    b_field.metadata(k),
-                )
+        dates_in_data = ds.unique_values("valid_datetime", progress_bar=False)["valid_datetime"]
+        for d in all_dates:
+            assert d.isoformat() in dates_in_data, d
-            c = field.to_numpy()
-            b = b_field.to_numpy()
-            assert c.shape == b.shape, (c.shape, b.shape)
+        ds1 = ds.sel(valid_datetime=[d.isoformat() for d in dates])
+        ds2 = ds.sel(valid_datetime=[d.isoformat() for d in shifted_dates])
-            ################
-            # Actual computation happens here
-            x = c - b
-            ################
+        assert len(ds1) == len(ds2), (len(ds1), len(ds2))
-            assert x.shape == c.shape, c.shape
-            print(f"Computing data for {field.metadata('valid_datetime')}={field}-{b_field}")
-            out.write(x, template=field)
+        group1 = group_by_field(ds1)
+        group2 = group_by_field(ds2)
-    out.close()
+        assert group1.keys() == group2.keys(), (group1.keys(), group2.keys())
-    from earthkit.data import from_source
+        # prepare output tmp file so we can read it back
+        tmp = temp_file()
+        path = tmp.path
+        out = new_grib_output(path)
-    ds = from_source("file", path)
-    # save a reference to the tmp file so it is deleted
-    # only when the dataset is not used anymore
-    ds._tmp = tmp
+        for k in group1:
+            assert len(group1[k]) == len(group2[k]), k
+            print()
+            print("❌", k)
-    return ds
+            for field, b_field in zip(group1[k], group2[k]):
+                for k in ["param", "level", "number", "grid", "shape"]:
+                    assert field.metadata(k) == b_field.metadata(k), (
+                        k,
+                        field.metadata(k),
+                        b_field.metadata(k),
+                    )
+                c = field.to_numpy()
+                b = b_field.to_numpy()
+                assert c.shape == b.shape, (c.shape, b.shape)
-execute = tendencies
+                ################
+                # Actual computation happens here
+                x = c - b
+                ################
-if __name__ == "__main__":
-    import yaml
+                assert x.shape == c.shape, c.shape
+                print(f"Computing data for {field.metadata('valid_datetime')}={field}-{b_field}")
+                out.write(x, template=field)
-    config = yaml.safe_load(
-        """
+        out.close()
-    config:
-      time_increment: 12h
-      database: marser
-      class: ea
-      # date: computed automatically
-      # time: computed automatically
-      expver: "0001"
-      grid: 20.0/20.0
-      levtype: sfc
-      param: [2t]
-    """
-    )["config"]
+        from earthkit.data import from_source
-    dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
-    dates = to_datetime_list(dates)
+        ds = from_source("file", path)
+        # save a reference to the tmp file so it is deleted
+        # only when the dataset is not used anymore
+        ds._tmp = tmp
-    DEBUG = True
-    for f in tendencies(dates, **config):
-        print(f, f.to_numpy().mean())
+        return ds

anemoi/datasets/create/sources/xarray.py CHANGED Viewed

@@ -8,8 +8,6 @@
 # nor does it submit to any jurisdiction.
 from typing import Any
-from typing import Dict
-from typing import Optional
 import earthkit.data as ekd
@@ -28,11 +26,11 @@ class XarraySourceBase(Source):
     emoji = "✖️"  # For tracing
-    options: Optional[Dict[str, Any]] = None
-    flavour: Optional[Dict[str, Any]] = None
-    patch: Optional[Dict[str, Any]] = None
+    options: dict[str, Any] | None = None
+    flavour: dict[str, Any] | None = None
+    patch: dict[str, Any] | None = None
-    path_or_url: Optional[str] = None
+    path_or_url: str | None = None
     def __init__(self, context: Any, path: str = None, url: str = None, *args: Any, **kwargs: Any):
         """Initialise the source.

anemoi/datasets/create/sources/xarray_support/__init__.py CHANGED Viewed

@@ -10,10 +10,6 @@
 import datetime
 import logging
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Optional
-from typing import Union
 import earthkit.data as ekd
 import xarray as xr
@@ -21,13 +17,14 @@ from earthkit.data.core.fieldlist import MultiFieldList
 from anemoi.datasets.create.sources.patterns import iterate_patterns
-from ..legacy import legacy_source
+from .. import source_registry
+from ..legacy import LegacySource
 from .fieldlist import XarrayFieldList
 LOG = logging.getLogger(__name__)
-def check(what: str, ds: xr.Dataset, paths: List[str], **kwargs: Any) -> None:
+def check(what: str, ds: xr.Dataset, paths: list[str], **kwargs: Any) -> None:
     """Checks if the dataset has the expected number of fields.
     Parameters
@@ -53,12 +50,12 @@ def check(what: str, ds: xr.Dataset, paths: List[str], **kwargs: Any) -> None:
 def load_one(
     emoji: str,
     context: Any,
-    dates: List[str],
-    dataset: Union[str, xr.Dataset],
+    dates: list[str],
+    dataset: str | xr.Dataset,
     *,
-    options: Optional[Dict[str, Any]] = None,
-    flavour: Optional[str] = None,
-    patch: Optional[Any] = None,
+    options: dict[str, Any] | None = None,
+    flavour: str | None = None,
+    patch: Any | None = None,
     **kwargs: Any,
 ) -> ekd.FieldList:
     """Loads a single dataset.
@@ -97,7 +94,10 @@ def load_one(
         # If the dataset is a zarr store, we need to use the zarr engine
         options["engine"] = "zarr"
-    data = xr.open_dataset(dataset, **options)
+    if isinstance(dataset, xr.Dataset):
+        data = dataset
+    else:
+        data = xr.open_dataset(dataset, **options)
     fs = XarrayFieldList.from_xarray(data, flavour=flavour, patch=patch)
@@ -124,7 +124,7 @@ def load_one(
     return result
-def load_many(emoji: str, context: Any, dates: List[datetime.datetime], pattern: str, **kwargs: Any) -> ekd.FieldList:
+def load_many(emoji: str, context: Any, dates: list[datetime.datetime], pattern: str, **kwargs: Any) -> ekd.FieldList:
     """Loads multiple datasets.
     Parameters
@@ -153,26 +153,30 @@ def load_many(emoji: str, context: Any, dates: List[datetime.datetime], pattern:
     return MultiFieldList(result)
-@legacy_source("xarray")
-def execute(context: Any, dates: List[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
-    """Executes the loading of datasets.
-    Parameters
-    ----------
-    context : Any
-        Context object.
-    dates : List[str]
-        List of dates.
-    url : str
-        URL pattern for loading datasets.
-    *args : Any
-        Additional arguments.
-    **kwargs : Any
-        Additional keyword arguments.
-    Returns
-    -------
-    ekd.FieldList
-        The loaded datasets.
-    """
-    return load_many("🌐", context, dates, url, *args, **kwargs)
+@source_registry.register("xarray")
+class LegacyXarraySource(LegacySource):
+    name = "xarray"
+    @staticmethod
+    def _execute(context: Any, dates: list[str], url: str, *args: Any, **kwargs: Any) -> ekd.FieldList:
+        """Executes the loading of datasets.
+        Parameters
+        ----------
+        context : Any
+            Context object.
+        dates : List[str]
+            List of dates.
+        url : str
+            URL pattern for loading datasets.
+        *args : Any
+            Additional arguments.
+        **kwargs : Any
+            Additional keyword arguments.
+        Returns
+        -------
+        ekd.FieldList
+            The loaded datasets.
+        """
+        return load_many("🌐", context, dates, url, *args, **kwargs)

anemoi/datasets/create/sources/xarray_support/coordinates.py CHANGED Viewed

@@ -13,10 +13,6 @@ from __future__ import annotations
 import datetime
 import logging
 from typing import Any
-from typing import Dict
-from typing import Optional
-from typing import Tuple
-from typing import Union
 import numpy as np
 import xarray as xr
@@ -107,7 +103,7 @@ class Coordinate:
         """
         self.variable = variable
         self.scalar = is_scalar(variable)
-        self.kwargs: Dict[str, Any] = {}  # Used when creating a new coordinate (reduced method)
+        self.kwargs: dict[str, Any] = {}  # Used when creating a new coordinate (reduced method)
     def __len__(self) -> int:
         """Get the length of the coordinate.
@@ -127,7 +123,7 @@ class Coordinate:
         str
             The string representation of the coordinate.
         """
-        return "%s[name=%s,values=%s,shape=%s]" % (
+        return "{}[name={},values={},shape={}]".format(
             self.__class__.__name__,
             self.variable.name,
             self.variable.values if self.scalar else len(self),
@@ -152,7 +148,7 @@ class Coordinate:
             **self.kwargs,
         )
-    def index(self, value: Union[Any, list, tuple]) -> Optional[Union[int, list]]:
+    def index(self, value: Any | list | tuple) -> int | list | None:
         """Return the index of the value in the coordinate.
         Parameters
@@ -172,7 +168,7 @@ class Coordinate:
                 return self._index_multiple(value)
         return self._index_single(value)
-    def _index_single(self, value: Any) -> Optional[int]:
+    def _index_single(self, value: Any) -> int | None:
         """Return the index of a single value in the coordinate.
         Parameters
@@ -205,7 +201,7 @@ class Coordinate:
         return None
-    def _index_multiple(self, value: list) -> Optional[list]:
+    def _index_multiple(self, value: list) -> list | None:
         """Return the indices of multiple values in the coordinate.
         Parameters
@@ -275,7 +271,7 @@ class TimeCoordinate(Coordinate):
     is_time = True
     mars_names = ("valid_datetime",)
-    def index(self, time: datetime.datetime) -> Optional[int]:
+    def index(self, time: datetime.datetime) -> int | None:
         """Return the index of the time in the coordinate.
         Parameters
@@ -297,7 +293,7 @@ class DateCoordinate(Coordinate):
     is_date = True
     mars_names = ("date",)
-    def index(self, date: datetime.datetime) -> Optional[int]:
+    def index(self, date: datetime.datetime) -> int | None:
         """Return the index of the date in the coordinate.
         Parameters
@@ -436,7 +432,7 @@ class ScalarCoordinate(Coordinate):
     is_grid = False
     @property
-    def mars_names(self) -> Tuple[str, ...]:
+    def mars_names(self) -> tuple[str, ...]:
         """Get the MARS names for the coordinate."""
         return (self.variable.name,)

anemoi/datasets/create/sources/xarray_support/field.py CHANGED Viewed

@@ -12,9 +12,6 @@ import datetime
 import logging
 from functools import cached_property
 from typing import Any
-from typing import Dict
-from typing import Optional
-from typing import Tuple
 from earthkit.data import Field
 from earthkit.data.core.fieldlist import math
@@ -80,12 +77,21 @@ class XArrayField(Field):
         # Copy the metadata from the owner
         self._md = owner._metadata.copy()
+        aliases = {}
         for coord_name, coord_value in self.selection.coords.items():
             if is_scalar(coord_value):
                 # Extract the single value from the scalar dimension
                 # and store it in the metadata
                 coordinate = owner.by_name[coord_name]
-                self._md[coord_name] = coordinate.normalise(extract_single_value(coord_value))
+                normalised = coordinate.normalise(extract_single_value(coord_value))
+                self._md[coord_name] = normalised
+                for alias in coordinate.mars_names:
+                    aliases[alias] = normalised
+        # Add metadata aliases (e.g. levelist == level) only if they are not already present
+        for alias, value in aliases.items():
+            if alias not in self._md:
+                self._md[alias] = value
         # By now, the only dimensions should be latitude and longitude
         self._shape = tuple(list(self.selection.shape)[-2:])
@@ -93,13 +99,11 @@ class XArrayField(Field):
             raise ValueError(f"Invalid shape for selection {self._shape=}, {self.selection.shape=} {self.selection=}")
     @property
-    def shape(self) -> Tuple[int, int]:
+    def shape(self) -> tuple[int, int]:
         """Return the shape of the field."""
         return self._shape
-    def to_numpy(
-        self, flatten: bool = False, dtype: Optional[type] = None, index: Optional[int] = None
-    ) -> NDArray[Any]:
+    def to_numpy(self, flatten: bool = False, dtype: type | None = None, index: int | None = None) -> NDArray[Any]:
         """Convert the selection to a numpy array.
         Returns
@@ -117,16 +121,16 @@ class XArrayField(Field):
             Index to select a specific element, by default None.
         """
         if index is not None:
-            values = self.selection[index]
+            values = self.selection[index].values
         else:
-            values = self.selection
+            values = self.selection.values
         assert dtype is None
         if flatten:
-            return values.values.flatten()
+            return values.flatten()
-        return values  # .reshape(self.shape)
+        return values
     @cached_property
     def _metadata(self) -> XArrayMetadata:
@@ -137,7 +141,7 @@ class XArrayField(Field):
         """Return the grid points of the field."""
         return self.owner.grid_points()
-    def to_latlon(self, flatten: bool = True) -> Dict[str, Any]:
+    def to_latlon(self, flatten: bool = True) -> dict[str, Any]:
         """Convert the selection to latitude and longitude coordinates.
         Returns
@@ -154,7 +158,7 @@ class XArrayField(Field):
         return dict(lat=self.latitudes, lon=self.longitudes)
     @property
-    def resolution(self) -> Optional[Any]:
+    def resolution(self) -> Any | None:
         """Return the resolution of the field."""
         return None
@@ -185,9 +189,9 @@ class XArrayField(Field):
     def __repr__(self) -> str:
         """Return a string representation of the field."""
-        return repr(self._metadata)
+        return f"XArrayField({self._metadata})"
-    def _values(self, dtype: Optional[type] = None) -> Any:
+    def _values(self, dtype: type | None = None) -> Any:
         """Return the values of the selection.
         Returns

anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

anemoi-datasets 0.5.26py3-none-any.whl → 0.5.28py3-none-any.whl