PyPI - anemoi-datasets - Versions diffs - 0.5.24__py3-none-any.whl → 0.5.26__py3-none-any.whl - Mend

anemoi-datasets 0.5.24py3-none-any.whl → 0.5.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/finalise-additions.py +2 -1
anemoi/datasets/commands/finalise.py +2 -1
anemoi/datasets/commands/grib-index.py +1 -1
anemoi/datasets/commands/init-additions.py +2 -1
anemoi/datasets/commands/load-additions.py +2 -1
anemoi/datasets/commands/load.py +2 -1
anemoi/datasets/create/__init__.py +24 -33
anemoi/datasets/create/filter.py +22 -24
anemoi/datasets/create/input/__init__.py +0 -20
anemoi/datasets/create/input/step.py +2 -16
anemoi/datasets/create/sources/accumulations.py +7 -6
anemoi/datasets/create/sources/planetary_computer.py +44 -0
anemoi/datasets/create/sources/xarray_support/__init__.py +6 -22
anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -0
anemoi/datasets/create/sources/xarray_support/field.py +1 -4
anemoi/datasets/create/sources/xarray_support/flavour.py +44 -6
anemoi/datasets/create/sources/xarray_support/patch.py +44 -1
anemoi/datasets/create/sources/xarray_support/variable.py +6 -2
anemoi/datasets/data/complement.py +44 -10
anemoi/datasets/data/dataset.py +29 -0
anemoi/datasets/data/forwards.py +8 -2
anemoi/datasets/data/misc.py +74 -16
anemoi/datasets/data/observations/__init__.py +316 -0
anemoi/datasets/data/observations/legacy_obs_dataset.py +200 -0
anemoi/datasets/data/observations/multi.py +64 -0
anemoi/datasets/data/padded.py +227 -0
anemoi/datasets/data/records/__init__.py +442 -0
anemoi/datasets/data/records/backends/__init__.py +157 -0
anemoi/datasets/data/stores.py +7 -56
anemoi/datasets/data/subset.py +5 -0
anemoi/datasets/grids.py +6 -3
{anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/METADATA +3 -2
{anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/RECORD +38 -51
{anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/WHEEL +1 -1
anemoi/datasets/create/filters/__init__.py +0 -33
anemoi/datasets/create/filters/empty.py +0 -37
anemoi/datasets/create/filters/legacy.py +0 -93
anemoi/datasets/create/filters/noop.py +0 -37
anemoi/datasets/create/filters/orog_to_z.py +0 -58
anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
anemoi/datasets/create/filters/rename.py +0 -205
anemoi/datasets/create/filters/rotate_winds.py +0 -105
anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
anemoi/datasets/create/filters/sum.py +0 -68
anemoi/datasets/create/filters/transform.py +0 -51
anemoi/datasets/create/filters/unrotate_winds.py +0 -105
anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
anemoi/datasets/create/filters/wz_to_w.py +0 -98
anemoi/datasets/create/testing.py +0 -76
{anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/licenses/LICENSE +0 -0
{anemoi_datasets-0.5.24.dist-info → anemoi_datasets-0.5.26.dist-info}/top_level.txt +0 -0

anemoi/datasets/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.5.24'
-__version_tuple__ = version_tuple = (0, 5, 24)
+__version__ = version = '0.5.26'
+__version_tuple__ = version_tuple = (0, 5, 26)

anemoi/datasets/commands/finalise-additions.py CHANGED Viewed

@@ -61,7 +61,8 @@ class FinaliseAdditions(Command):
         if "debug" in options:
             options.pop("debug")
-            task(step, options)
+        task(step, options)
         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")

anemoi/datasets/commands/finalise.py CHANGED Viewed

@@ -55,7 +55,8 @@ class Finalise(Command):
         if "debug" in options:
             options.pop("debug")
-            task(step, options)
+        task(step, options)
         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")

anemoi/datasets/commands/grib-index.py CHANGED Viewed

@@ -81,7 +81,7 @@ class GribIndexCmd(Command):
             bool
                 True if the path matches, False otherwise.
             """
-            return fnmatch.fnmatch(path, args.match)
+            return fnmatch.fnmatch(os.path.basename(path), args.match)
         from anemoi.datasets.create.sources.grib_index import GribIndex

anemoi/datasets/commands/init-additions.py CHANGED Viewed

@@ -61,7 +61,8 @@ class InitAdditions(Command):
         if "debug" in options:
             options.pop("debug")
-            task(step, options)
+        task(step, options)
         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")

anemoi/datasets/commands/load-additions.py CHANGED Viewed

@@ -62,7 +62,8 @@ class LoadAdditions(Command):
         if "debug" in options:
             options.pop("debug")
-            task(step, options)
+        task(step, options)
         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")

anemoi/datasets/commands/load.py CHANGED Viewed

@@ -62,7 +62,8 @@ class Load(Command):
         if "debug" in options:
             options.pop("debug")
-            task(step, options)
+        task(step, options)
         LOG.info(f"Create step '{step}' completed in {seconds_to_human(time.time()-now)}")

anemoi/datasets/create/__init__.py CHANGED Viewed

@@ -44,7 +44,7 @@ from .check import check_data_values
 from .chunks import ChunkFilter
 from .config import build_output
 from .config import loader_config
-from .input import build_input
+from .input import InputBuilder
 from .statistics import Summary
 from .statistics import TmpStatistics
 from .statistics import check_variance
@@ -101,7 +101,9 @@ def json_tidy(o: Any) -> Any:
 def build_statistics_dates(
-    dates: list[datetime.datetime], start: Optional[datetime.datetime], end: Optional[datetime.datetime]
+    dates: list[datetime.datetime],
+    start: Optional[datetime.datetime],
+    end: Optional[datetime.datetime],
 ) -> tuple[str, str]:
     """Compute the start and end dates for the statistics.
@@ -551,36 +553,16 @@ class HasElementForDataMixin:
         self.output = build_output(config.output, parent=self)
-        self.input = build_input_(main_config=config, output_config=self.output)
-        # LOG.info("%s", self.input)
-def build_input_(main_config: Any, output_config: Any) -> Any:
-    """Build the input for the dataset.
-    Parameters
-    ----------
-    main_config : Any
-        The main configuration.
-    output_config : Any
-        The output configuration.
-    Returns
-    -------
-    Any
-        The input builder.
-    """
-    builder = build_input(
-        main_config.input,
-        data_sources=main_config.get("data_sources", {}),
-        order_by=output_config.order_by,
-        flatten_grid=output_config.flatten_grid,
-        remapping=build_remapping(output_config.remapping),
-        use_grib_paramid=main_config.build.use_grib_paramid,
-    )
-    LOG.debug("✅ INPUT_BUILDER")
-    LOG.debug(builder)
-    return builder
+        self.input = InputBuilder(
+            config.input,
+            data_sources=config.get("data_sources", {}),
+            order_by=self.output.order_by,
+            flatten_grid=self.output.flatten_grid,
+            remapping=build_remapping(self.output.remapping),
+            use_grib_paramid=config.build.use_grib_paramid,
+        )
+        LOG.debug("✅ INPUT_BUILDER")
+        LOG.debug(self.input)
 class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
@@ -1541,7 +1523,16 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
         if not all(self.registry.get_flags(sync=False)):
             raise Exception(f"❗Zarr {self.path} is not fully built, not writing statistics into dataset.")
-        for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
+        for k in [
+            "mean",
+            "stdev",
+            "minimum",
+            "maximum",
+            "sums",
+            "squares",
+            "count",
+            "has_nans",
+        ]:
             self.dataset.add_dataset(name=k, array=stats[k], dimensions=("variable",))
         self.registry.add_to_history("compute_statistics_end")

anemoi/datasets/create/filter.py CHANGED Viewed

@@ -7,44 +7,42 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
-from abc import ABC
-from abc import abstractmethod
 from typing import Any
+from typing import Dict
 import earthkit.data as ekd
-class Filter(ABC):
-    """A base class for filters."""
+class TransformFilter:
+    """Calls filters from anemoi.transform.filters
-    def __init__(self, context: Any, *args: Any, **kwargs: Any) -> None:
-        """Initialise the filter.
+    Parameters
+    ----------
+    context : Any
+        The context in which the filter is created.
+    name : str
+        The name of the filter.
+    config : Dict[str, Any]
+        The configuration for the filter.
+    """
-        Parameters
-        ----------
-        context : Any
-            The context in which the filter is created.
-        *args : tuple
-            Positional arguments.
-        **kwargs : dict
-            Keyword arguments.
-        """
+    def __init__(self, context: Any, name: str, config: Dict[str, Any]) -> None:
+        from anemoi.transform.filters import create_filter
-        self.context = context
+        self.name = name
+        self.transform_filter = create_filter(context, config)
-    @abstractmethod
-    def execute(self, data: ekd.FieldList) -> ekd.FieldList:
-        """Execute the filter.
+    def execute(self, input: ekd.FieldList) -> ekd.FieldList:
+        """Execute the transformation filter.
         Parameters
         ----------
-        data : ekd.FieldList
-            The input data.
+        input : ekd.FieldList
+            The input data to be transformed.
         Returns
         -------
         ekd.FieldList
-            The output data.
+            The transformed data.
         """
-        pass
+        return self.transform_filter.forward(input)

anemoi/datasets/create/input/__init__.py CHANGED Viewed

@@ -104,23 +104,3 @@ class InputBuilder:
             Trace string.
         """
         return f"InputBuilder({group_of_dates})"
-def build_input(config: dict, data_sources: Union[dict, list], **kwargs: Any) -> InputBuilder:
-    """Build an InputBuilder instance.
-    Parameters
-    ----------
-    config : dict
-        Configuration dictionary.
-    data_sources : Union[dict, list]
-        Data sources.
-    **kwargs : Any
-        Additional keyword arguments.
-    Returns
-    -------
-    InputBuilder
-        An instance of InputBuilder.
-    """
-    return InputBuilder(config, data_sources, **kwargs)

anemoi/datasets/create/input/step.py CHANGED Viewed

@@ -8,7 +8,6 @@
 # nor does it submit to any jurisdiction.
 import logging
-import warnings
 from copy import deepcopy
 from typing import Any
 from typing import Dict
@@ -165,24 +164,11 @@ def step_factory(config: Dict[str, Any], context: ActionContext, action_path: Li
     if cls is not None:
         return cls(context, action_path, previous_step, *args, **kwargs)
-    # Try filters from datasets filter registry
+    # Try filters from transform filter registry
     from anemoi.transform.filters import filter_registry as transform_filter_registry
-    from ..filters import create_filter as create_datasets_filter
-    from ..filters import filter_registry as datasets_filter_registry
-    if datasets_filter_registry.is_registered(key):
-        if transform_filter_registry.is_registered(key):
-            warnings.warn(f"Filter `{key}` is registered in both datasets and transform filter registries")
-        filter = create_datasets_filter(None, config)
-        return FunctionStepAction(context, action_path + [key], previous_step, key, filter)
-    # Use filters from transform registry
     if transform_filter_registry.is_registered(key):
-        from ..filters.transform import TransformFilter
+        from ..filter import TransformFilter
         return FunctionStepAction(
             context, action_path + [key], previous_step, key, TransformFilter(context, key, config)

anemoi/datasets/create/sources/accumulations.py CHANGED Viewed

@@ -459,12 +459,13 @@ class AccumulationFromStart(Accumulation):
             A tuple representing the MARS date-time step.
         """
         assert user_date is None, user_date
-        assert not frequency, frequency
         steps = (step1 + add_step, step2 + add_step)
         if steps[0] == 0:
             steps = (steps[1],)
+        assert frequency == 0 or frequency == (step2 - step1), frequency
         return (
             base_date.year * 10000 + base_date.month * 100 + base_date.day,
             base_date.hour * 100 + base_date.minute,
@@ -824,6 +825,11 @@ def _compute_accumulations(
     step1, step2 = user_accumulation_period
     assert step1 < step2, user_accumulation_period
+    if accumulations_reset_frequency is not None:
+        AccumulationClass = AccumulationFromLastReset
+    else:
+        AccumulationClass = AccumulationFromStart if data_accumulation_period in (0, None) else AccumulationFromLastStep
     if data_accumulation_period is None:
         data_accumulation_period = user_accumulation_period[1] - user_accumulation_period[0]
@@ -838,11 +844,6 @@ def _compute_accumulations(
     base_times = [t // 100 if t > 100 else t for t in base_times]
-    if accumulations_reset_frequency is not None:
-        AccumulationClass = AccumulationFromLastReset
-    else:
-        AccumulationClass = AccumulationFromStart if data_accumulation_period in (0, None) else AccumulationFromLastStep
     mars_date_time_steps = AccumulationClass.mars_date_time_steps(
         dates=dates,
         step1=step1,

anemoi/datasets/create/sources/planetary_computer.py ADDED Viewed

@@ -0,0 +1,44 @@
+# (C) Copyright 2024 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+from . import source_registry
+from .xarray import XarraySourceBase
+@source_registry.register("planetary_computer")
+class PlanetaryComputerSource(XarraySourceBase):
+    """An Xarray data source for the planetary_computer."""
+    emoji = "🪐"
+    def __init__(self, context, data_catalog_id, version="v1", *args, **kwargs: dict):
+        import planetary_computer
+        import pystac_client
+        self.data_catalog_id = data_catalog_id
+        self.flavour = kwargs.pop("flavour", None)
+        self.patch = kwargs.pop("patch", None)
+        self.options = kwargs.pop("options", {})
+        catalog = pystac_client.Client.open(
+            f"https://planetarycomputer.microsoft.com/api/stac/{version}/",
+            modifier=planetary_computer.sign_inplace,
+        )
+        collection = catalog.get_collection(self.data_catalog_id)
+        asset = collection.assets["zarr-abfs"]
+        if "xarray:storage_options" in asset.extra_fields:
+            self.options["storage_options"] = asset.extra_fields["xarray:storage_options"]
+        self.options.update(asset.extra_fields["xarray:open_kwargs"])
+        super().__init__(context, url=asset.href, *args, **kwargs)

anemoi/datasets/create/sources/xarray_support/__init__.py CHANGED Viewed

@@ -20,7 +20,6 @@ import xarray as xr
 from earthkit.data.core.fieldlist import MultiFieldList
 from anemoi.datasets.create.sources.patterns import iterate_patterns
-from anemoi.datasets.data.stores import name_to_zarr_store
 from ..legacy import legacy_source
 from .fieldlist import XarrayFieldList
@@ -89,37 +88,22 @@ def load_one(
         The loaded dataset.
     """
-    """
-    We manage the S3 client ourselves, bypassing fsspec and s3fs layers, because sometimes something on the stack
-    zarr/fsspec/s3fs/boto3 (?) seem to flags files as missing when they actually are not (maybe when S3 reports some sort of
-    connection error). In that case,  Zarr will silently fill the chunks that could not be downloaded with NaNs.
-    See https://github.com/pydata/xarray/issues/8842
-    We have seen this bug triggered when we run many clients in parallel, for example, when we create a new dataset using `xarray-zarr`.
-    """
     if options is None:
         options = {}
     context.trace(emoji, dataset, options, kwargs)
-    if isinstance(dataset, str) and ".zarr" in dataset:
-        data = xr.open_zarr(name_to_zarr_store(dataset), **options)
-    elif "planetarycomputer" in dataset:
-        store = name_to_zarr_store(dataset)
-        if "store" in store:
-            data = xr.open_zarr(**store)
-        if "filename_or_obj" in store:
-            data = xr.open_dataset(**store)
-    else:
-        data = xr.open_dataset(dataset, **options)
+    if isinstance(dataset, str) and dataset.endswith(".zarr"):
+        # If the dataset is a zarr store, we need to use the zarr engine
+        options["engine"] = "zarr"
+    data = xr.open_dataset(dataset, **options)
     fs = XarrayFieldList.from_xarray(data, flavour=flavour, patch=patch)
     if len(dates) == 0:
         result = fs.sel(**kwargs)
     else:
-        print("dates", dates, kwargs)
         result = MultiFieldList([fs.sel(valid_datetime=date, **kwargs) for date in dates])
     if len(result) == 0:
@@ -130,7 +114,7 @@ def load_one(
             a = ["valid_datetime", k.metadata("valid_datetime", default=None)]
             for n in kwargs.keys():
                 a.extend([n, k.metadata(n, default=None)])
-            print([str(x) for x in a])
+            LOG.warning(f"{[str(x) for x in a]}")
             if i > 16:
                 break

anemoi/datasets/create/sources/xarray_support/coordinates.py CHANGED Viewed

@@ -95,6 +95,7 @@ class Coordinate:
     is_member = False
     is_x = False
     is_y = False
+    is_point = False
     def __init__(self, variable: xr.DataArray) -> None:
         """Initialize the coordinate.
@@ -390,6 +391,13 @@ class EnsembleCoordinate(Coordinate):
         return value
+class PointCoordinate(Coordinate):
+    """Coordinate class for point data."""
+    is_point = True
+    mars_names = ("point",)
 class LongitudeCoordinate(Coordinate):
     """Coordinate class for longitude."""

anemoi/datasets/create/sources/xarray_support/field.py CHANGED Viewed

@@ -87,13 +87,10 @@ class XArrayField(Field):
                 coordinate = owner.by_name[coord_name]
                 self._md[coord_name] = coordinate.normalise(extract_single_value(coord_value))
-        # print(values.ndim, values.shape, selection.dims)
         # By now, the only dimensions should be latitude and longitude
         self._shape = tuple(list(self.selection.shape)[-2:])
         if math.prod(self._shape) != math.prod(self.selection.shape):
-            print(self.selection.ndim, self.selection.shape)
-            print(self.selection)
-            raise ValueError("Invalid shape for selection")
+            raise ValueError(f"Invalid shape for selection {self._shape=}, {self.selection.shape=} {self.selection=}")
     @property
     def shape(self) -> Tuple[int, int]:

anemoi/datasets/create/sources/xarray_support/flavour.py CHANGED Viewed

@@ -26,6 +26,7 @@ from .coordinates import EnsembleCoordinate
 from .coordinates import LatitudeCoordinate
 from .coordinates import LevelCoordinate
 from .coordinates import LongitudeCoordinate
+from .coordinates import PointCoordinate
 from .coordinates import ScalarCoordinate
 from .coordinates import StepCoordinate
 from .coordinates import TimeCoordinate
@@ -134,6 +135,10 @@ class CoordinateGuesser(ABC):
         d: Optional[Coordinate] = None
+        d = self._is_point(coordinate, attributes)
+        if d is not None:
+            return d
         d = self._is_longitude(coordinate, attributes)
         if d is not None:
             return d
@@ -308,9 +313,9 @@ class CoordinateGuesser(ABC):
             return self._grid_cache[(x.name, y.name, dim_vars)]
         grid_mapping = variable.attrs.get("grid_mapping", None)
-        if grid_mapping is not None:
-            print(f"grid_mapping: {grid_mapping}")
-            print(self.ds[grid_mapping])
+        # if grid_mapping is not None:
+        #     print(f"grid_mapping: {grid_mapping}")
+        #     print(self.ds[grid_mapping])
         if grid_mapping is None:
             LOG.warning(f"No 'grid_mapping' attribute provided for '{variable.name}'")
@@ -392,6 +397,10 @@ class CoordinateGuesser(ABC):
         """
         pass
+    @abstractmethod
+    def _is_point(self, c: xr.DataArray, attributes: CoordinateAttributes) -> Optional[PointCoordinate]:
+        pass
     @abstractmethod
     def _is_latitude(self, c: xr.DataArray, attributes: CoordinateAttributes) -> Optional[LatitudeCoordinate]:
         """Checks if the coordinate is a latitude.
@@ -550,6 +559,15 @@ class DefaultCoordinateGuesser(CoordinateGuesser):
         """
         super().__init__(ds)
+    def _is_point(self, c: xr.DataArray, attributes: CoordinateAttributes) -> Optional[PointCoordinate]:
+        if attributes.standard_name in ["cell", "station", "poi", "point"]:
+            return PointCoordinate(c)
+        if attributes.name in ["cell", "station", "poi", "point"]:  # WeatherBench
+            return PointCoordinate(c)
+        return None
     def _is_longitude(self, c: xr.DataArray, attributes: CoordinateAttributes) -> Optional[LongitudeCoordinate]:
         """Checks if the coordinate is a longitude.
@@ -750,6 +768,9 @@ class DefaultCoordinateGuesser(CoordinateGuesser):
         if attributes.standard_name == "air_pressure" and attributes.units == "hPa":
             return LevelCoordinate(c, "pl")
+        if attributes.long_name == "pressure" and attributes.units in ["hPa", "Pa"]:
+            return LevelCoordinate(c, "pl")
         if attributes.name == "level":
             return LevelCoordinate(c, "pl")
@@ -759,9 +780,6 @@ class DefaultCoordinateGuesser(CoordinateGuesser):
         if attributes.standard_name == "depth":
             return LevelCoordinate(c, "depth")
-        if attributes.name == "vertical" and attributes.units == "hPa":
-            return LevelCoordinate(c, "pl")
         return None
     def _is_number(self, c: xr.DataArray, attributes: CoordinateAttributes) -> Optional[EnsembleCoordinate]:
@@ -1040,3 +1058,23 @@ class FlavourCoordinateGuesser(CoordinateGuesser):
             return EnsembleCoordinate(c)
         return None
+    def _is_point(self, c: xr.DataArray, attributes: CoordinateAttributes) -> Optional[PointCoordinate]:
+        """Checks if the coordinate is a point coordinate using the flavour rules.
+        Parameters
+        ----------
+        c : xr.DataArray
+            The coordinate to check.
+        attributes : CoordinateAttributes
+            The attributes of the coordinate.
+        Returns
+        -------
+        Optional[PointCoordinate]
+            The StepCoorPointCoordinateinate if matched, else None.
+        """
+        if self._match(c, "point", attributes):
+            return PointCoordinate(c)
+        return None

anemoi/datasets/create/sources/xarray_support/patch.py CHANGED Viewed

@@ -61,9 +61,50 @@ def patch_coordinates(ds: xr.Dataset, coordinates: List[str]) -> Any:
     return ds
+def patch_rename(ds: xr.Dataset, renames: dict[str, str]) -> Any:
+    """Rename variables in the dataset.
+    Parameters
+    ----------
+    ds : xr.Dataset
+        The dataset to patch.
+    renames : dict[str, str]
+        Mapping from old variable names to new variable names.
+    Returns
+    -------
+    Any
+        The patched dataset.
+    """
+    return ds.rename(renames)
+def patch_sort_coordinate(ds: xr.Dataset, sort_coordinates: List[str]) -> Any:
+    """Sort the coordinates of the dataset.
+    Parameters
+    ----------
+    ds : xr.Dataset
+        The dataset to patch.
+    sort_coordinates : List[str]
+        The coordinates to sort.
+    Returns
+    -------
+    Any
+        The patched dataset.
+    """
+    for name in sort_coordinates:
+        ds = ds.sortby(name)
+    return ds
 PATCHES = {
     "attributes": patch_attributes,
     "coordinates": patch_coordinates,
+    "rename": patch_rename,
+    "sort_coordinates": patch_sort_coordinate,
 }
@@ -82,7 +123,9 @@ def patch_dataset(ds: xr.Dataset, patch: Dict[str, Dict[str, Any]]) -> Any:
     Any
         The patched dataset.
     """
-    for what, values in patch.items():
+    ORDER = ["coordinates", "attributes", "rename", "sort_coordinates"]
+    for what, values in sorted(patch.items(), key=lambda x: ORDER.index(x[0])):
         if what not in PATCHES:
             raise ValueError(f"Unknown patch type {what!r}")

anemoi/datasets/create/sources/xarray_support/variable.py CHANGED Viewed

@@ -82,8 +82,12 @@ class Variable:
         self.time = time
-        self.shape = tuple(len(c.variable) for c in coordinates if c.is_dim and not c.scalar and not c.is_grid)
-        self.names = {c.variable.name: c for c in coordinates if c.is_dim and not c.scalar and not c.is_grid}
+        self.shape = tuple(
+            len(c.variable) for c in coordinates if c.is_dim and not c.scalar and not c.is_grid and not c.is_point
+        )
+        self.names = {
+            c.variable.name: c for c in coordinates if c.is_dim and not c.scalar and not c.is_grid and not c.is_point
+        }
         self.by_name = {c.variable.name: c for c in coordinates}
         # We need that alias for the time dimension

anemoi-datasets 0.5.24__py3-none-any.whl → 0.5.26__py3-none-any.whl

anemoi-datasets 0.5.24py3-none-any.whl → 0.5.26py3-none-any.whl