PyPI - anemoi-datasets - Versions diffs - 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl - Mend

anemoi-datasets 0.5.26py3-none-any.whl → 0.5.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

anemoi/datasets/__init__.py +1 -2
anemoi/datasets/_version.py +16 -3
anemoi/datasets/commands/check.py +1 -1
anemoi/datasets/commands/copy.py +1 -2
anemoi/datasets/commands/create.py +1 -1
anemoi/datasets/commands/inspect.py +27 -35
anemoi/datasets/commands/recipe/__init__.py +93 -0
anemoi/datasets/commands/recipe/format.py +55 -0
anemoi/datasets/commands/recipe/migrate.py +555 -0
anemoi/datasets/commands/validate.py +59 -0
anemoi/datasets/compute/recentre.py +3 -6
anemoi/datasets/create/__init__.py +64 -26
anemoi/datasets/create/check.py +10 -12
anemoi/datasets/create/chunks.py +1 -2
anemoi/datasets/create/config.py +5 -6
anemoi/datasets/create/input/__init__.py +44 -65
anemoi/datasets/create/input/action.py +296 -238
anemoi/datasets/create/input/context/__init__.py +71 -0
anemoi/datasets/create/input/context/field.py +54 -0
anemoi/datasets/create/input/data_sources.py +7 -9
anemoi/datasets/create/input/misc.py +2 -75
anemoi/datasets/create/input/repeated_dates.py +11 -130
anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
anemoi/datasets/create/input/trace.py +1 -1
anemoi/datasets/create/patch.py +1 -2
anemoi/datasets/create/persistent.py +3 -5
anemoi/datasets/create/size.py +1 -3
anemoi/datasets/create/sources/accumulations.py +120 -145
anemoi/datasets/create/sources/accumulations2.py +20 -53
anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
anemoi/datasets/create/sources/constants.py +39 -40
anemoi/datasets/create/sources/empty.py +22 -19
anemoi/datasets/create/sources/fdb.py +133 -0
anemoi/datasets/create/sources/forcings.py +29 -29
anemoi/datasets/create/sources/grib.py +94 -78
anemoi/datasets/create/sources/grib_index.py +57 -55
anemoi/datasets/create/sources/hindcasts.py +57 -59
anemoi/datasets/create/sources/legacy.py +10 -62
anemoi/datasets/create/sources/mars.py +121 -149
anemoi/datasets/create/sources/netcdf.py +28 -25
anemoi/datasets/create/sources/opendap.py +28 -26
anemoi/datasets/create/sources/patterns.py +4 -6
anemoi/datasets/create/sources/recentre.py +46 -48
anemoi/datasets/create/sources/repeated_dates.py +44 -0
anemoi/datasets/create/sources/source.py +26 -51
anemoi/datasets/create/sources/tendencies.py +68 -98
anemoi/datasets/create/sources/xarray.py +4 -6
anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
anemoi/datasets/create/sources/xarray_support/field.py +20 -16
anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
anemoi/datasets/create/sources/xarray_support/time.py +10 -13
anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
anemoi/datasets/create/sources/xarray_zarr.py +28 -25
anemoi/datasets/create/sources/zenodo.py +43 -41
anemoi/datasets/create/statistics/__init__.py +3 -6
anemoi/datasets/create/testing.py +4 -0
anemoi/datasets/create/typing.py +1 -2
anemoi/datasets/create/utils.py +0 -43
anemoi/datasets/create/zarr.py +7 -2
anemoi/datasets/data/__init__.py +15 -6
anemoi/datasets/data/complement.py +7 -12
anemoi/datasets/data/concat.py +5 -8
anemoi/datasets/data/dataset.py +48 -47
anemoi/datasets/data/debug.py +7 -9
anemoi/datasets/data/ensemble.py +4 -6
anemoi/datasets/data/fill_missing.py +7 -10
anemoi/datasets/data/forwards.py +22 -26
anemoi/datasets/data/grids.py +12 -168
anemoi/datasets/data/indexing.py +9 -12
anemoi/datasets/data/interpolate.py +7 -15
anemoi/datasets/data/join.py +8 -12
anemoi/datasets/data/masked.py +6 -11
anemoi/datasets/data/merge.py +5 -9
anemoi/datasets/data/misc.py +41 -45
anemoi/datasets/data/missing.py +11 -16
anemoi/datasets/data/observations/__init__.py +8 -14
anemoi/datasets/data/padded.py +3 -5
anemoi/datasets/data/records/backends/__init__.py +2 -2
anemoi/datasets/data/rescale.py +5 -12
anemoi/datasets/data/rolling_average.py +141 -0
anemoi/datasets/data/select.py +13 -16
anemoi/datasets/data/statistics.py +4 -7
anemoi/datasets/data/stores.py +22 -29
anemoi/datasets/data/subset.py +8 -11
anemoi/datasets/data/unchecked.py +7 -11
anemoi/datasets/data/xy.py +25 -21
anemoi/datasets/dates/__init__.py +15 -18
anemoi/datasets/dates/groups.py +7 -10
anemoi/datasets/dumper.py +76 -0
anemoi/datasets/grids.py +4 -185
anemoi/datasets/schemas/recipe.json +131 -0
anemoi/datasets/testing.py +93 -7
anemoi/datasets/validate.py +598 -0
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
anemoi/datasets/create/filter.py +0 -48
anemoi/datasets/create/input/concat.py +0 -164
anemoi/datasets/create/input/context.py +0 -89
anemoi/datasets/create/input/empty.py +0 -54
anemoi/datasets/create/input/filter.py +0 -118
anemoi/datasets/create/input/function.py +0 -233
anemoi/datasets/create/input/join.py +0 -130
anemoi/datasets/create/input/pipe.py +0 -66
anemoi/datasets/create/input/step.py +0 -177
anemoi/datasets/create/input/template.py +0 -162
anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
{anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0

anemoi/datasets/data/indexing.py CHANGED Viewed

@@ -8,12 +8,9 @@
 # nor does it submit to any jurisdiction.
+from collections.abc import Callable
 from functools import wraps
 from typing import Any
-from typing import Callable
-from typing import List
-from typing import Tuple
-from typing import Union
 import numpy as np
 from numpy.typing import NDArray
@@ -23,7 +20,7 @@ from .dataset import Shape
 from .dataset import TupleIndex
-def _tuple_with_slices(t: TupleIndex, shape: Shape) -> Tuple[TupleIndex, Tuple[int, ...]]:
+def _tuple_with_slices(t: TupleIndex, shape: Shape) -> tuple[TupleIndex, tuple[int, ...]]:
     """Replace all integers in a tuple with slices, so we preserve the dimensionality.
     Parameters:
@@ -87,7 +84,7 @@ def _index_to_tuple(index: FullIndex, shape: Shape) -> TupleIndex:
     raise ValueError(f"Invalid index: {index}")
-def index_to_slices(index: Union[int, slice, Tuple], shape: Shape) -> Tuple[TupleIndex, Tuple[int, ...]]:
+def index_to_slices(index: int | slice | tuple, shape: Shape) -> tuple[TupleIndex, tuple[int, ...]]:
     """Convert an index to a tuple of slices, with the same dimensionality as the shape.
     Parameters:
@@ -100,7 +97,7 @@ def index_to_slices(index: Union[int, slice, Tuple], shape: Shape) -> Tuple[Tupl
     return _tuple_with_slices(_index_to_tuple(index, shape), shape)
-def apply_index_to_slices_changes(result: NDArray[Any], changes: Tuple[int, ...]) -> NDArray[Any]:
+def apply_index_to_slices_changes(result: NDArray[Any], changes: tuple[int, ...]) -> NDArray[Any]:
     """Apply changes to the result array based on the slices.
     Parameters:
@@ -118,7 +115,7 @@ def apply_index_to_slices_changes(result: NDArray[Any], changes: Tuple[int, ...]
     return result
-def update_tuple(t: Tuple, index: int, value: Any) -> Tuple[Tuple, Any]:
+def update_tuple(t: tuple, index: int, value: Any) -> tuple[tuple, Any]:
     """Replace the elements of a tuple at the given index with a new value.
     Parameters:
@@ -135,7 +132,7 @@ def update_tuple(t: Tuple, index: int, value: Any) -> Tuple[Tuple, Any]:
     return tuple(t), prev
-def length_to_slices(index: slice, lengths: List[int]) -> List[Union[slice, None]]:
+def length_to_slices(index: slice, lengths: list[int]) -> list[slice | None]:
     """Convert an index to a list of slices, given the lengths of the dimensions.
     Parameters:
@@ -174,7 +171,7 @@ def length_to_slices(index: slice, lengths: List[int]) -> List[Union[slice, None
     return result
-def _as_tuples(index: Tuple) -> Tuple:
+def _as_tuples(index: tuple) -> tuple:
     """Convert elements of the index to tuples if they are lists or arrays.
     Parameters:
@@ -219,7 +216,7 @@ def expand_list_indexing(method: Callable[..., NDArray[Any]]) -> Callable[..., N
         if not any(isinstance(i, (list, tuple)) for i in index):
             return method(self, index)
-        which: List[int] = []
+        which: list[int] = []
         for i, idx in enumerate(index):
             if isinstance(idx, (list, tuple)):
                 which.append(i)
@@ -241,7 +238,7 @@ def expand_list_indexing(method: Callable[..., NDArray[Any]]) -> Callable[..., N
     return wrapper
-def make_slice_or_index_from_list_or_tuple(indices: List[int]) -> Union[List[int], slice]:
+def make_slice_or_index_from_list_or_tuple(indices: list[int]) -> list[int] | slice:
     """Convert a list or tuple of indices to a slice or an index, if possible.
     Parameters:

anemoi/datasets/data/interpolate.py CHANGED Viewed

@@ -12,12 +12,6 @@ import datetime
 import logging
 from functools import cached_property
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Optional
-from typing import Set
-from typing import Tuple
-from typing import Union
 import numpy as np
 from anemoi.utils.dates import frequency_to_timedelta
@@ -193,7 +187,7 @@ class InterpolateFrequency(Forwards):
         return Node(self, [self.forward.tree()], frequency=self.frequency)
     @cached_property
-    def missing(self) -> Set[int]:
+    def missing(self) -> set[int]:
         """Get the missing data indices."""
         result = []
         j = 0
@@ -204,10 +198,10 @@ class InterpolateFrequency(Forwards):
                     result.append(j)
                 j += 1
-        result = set(x for x in result if x < self._len)
+        result = {x for x in result if x < self._len}
         return result
-    def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
         """Get the metadata specific to the InterpolateFrequency subclass.
         Returns
@@ -221,9 +215,7 @@ class InterpolateFrequency(Forwards):
 class InterpolateNearest(Forwards):
-    def __init__(
-        self, dataset: Dataset, interpolate_variables: List[str], max_distance: Optional[float] = None
-    ) -> None:
+    def __init__(self, dataset: Dataset, interpolate_variables: list[str], max_distance: float | None = None) -> None:
         """Initialize the InterpolateNearest class.
         Parameters
@@ -262,7 +254,7 @@ class InterpolateNearest(Forwards):
         return self.forward.shape
     @property
-    def metadata(self) -> Dict[str, Any]:
+    def metadata(self) -> dict[str, Any]:
         return self.forward.metadata()
     @staticmethod
@@ -281,12 +273,12 @@ class InterpolateNearest(Forwards):
         result = target_data[(slice(None),) + index[1:]]
         return apply_index_to_slices_changes(result, changes)
-    def __getitem__(self, index: Union[int, slice, Tuple[Union[int, slice], ...]]) -> NDArray[Any]:
+    def __getitem__(self, index: int | slice | tuple[int | slice, ...]) -> NDArray[Any]:
         if isinstance(index, (int, slice)):
             index = (index, slice(None), slice(None), slice(None))
         return self._get_tuple(index)
-    def subclass_metadata_specific(self) -> Dict[str, Any]:
+    def subclass_metadata_specific(self) -> dict[str, Any]:
         return {
             "interpolate_variables": self.vars,
         }

anemoi/datasets/data/join.py CHANGED Viewed

@@ -12,10 +12,6 @@ import datetime
 import logging
 from functools import cached_property
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Optional
-from typing import Set
 import numpy as np
 from numpy.typing import NDArray
@@ -182,10 +178,10 @@ class Join(Combined):
         return Select(self, indices, {"overlay": variables})
     @cached_property
-    def variables(self) -> List[str]:
+    def variables(self) -> list[str]:
         """Get the variables of the joined dataset."""
         seen = set()
-        result: List[str] = []
+        result: list[str] = []
         for d in reversed(self.datasets):
             for v in reversed(d.variables):
                 while v in seen:
@@ -196,7 +192,7 @@ class Join(Combined):
         return result
     @property
-    def variables_metadata(self) -> Dict[str, Any]:
+    def variables_metadata(self) -> dict[str, Any]:
         """Get the metadata of the variables."""
         result = {}
         variables = [v for v in self.variables if not (v.startswith("(") and v.endswith(")"))]
@@ -216,18 +212,18 @@ class Join(Combined):
         return result
     @cached_property
-    def name_to_index(self) -> Dict[str, int]:
+    def name_to_index(self) -> dict[str, int]:
         """Get the mapping of variable names to indices."""
         return {k: i for i, k in enumerate(self.variables)}
     @property
-    def statistics(self) -> Dict[str, NDArray[Any]]:
+    def statistics(self) -> dict[str, NDArray[Any]]:
         """Get the statistics of the joined dataset."""
         return {
             k: np.concatenate([d.statistics[k] for d in self.datasets], axis=0) for k in self.datasets[0].statistics
         }
-    def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
+    def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
         """Get the statistics tendencies of the joined dataset.
         Parameters
@@ -268,9 +264,9 @@ class Join(Combined):
         assert False
     @cached_property
-    def missing(self) -> Set[int]:
+    def missing(self) -> set[int]:
         """Get the missing data indices."""
-        result: Set[int] = set()
+        result: set[int] = set()
         for d in self.datasets:
             result = result | d.missing
         return result

anemoi/datasets/data/masked.py CHANGED Viewed

@@ -11,11 +11,6 @@
 import logging
 from functools import cached_property
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Optional
-from typing import Tuple
-from typing import Union
 import numpy as np
 from numpy.typing import NDArray
@@ -117,7 +112,7 @@ class Masked(Forwards):
         result = apply_index_to_slices_changes(result, changes)
         return result
-    def collect_supporting_arrays(self, collected: List[Tuple], *path: Any) -> None:
+    def collect_supporting_arrays(self, collected: list[tuple], *path: Any) -> None:
         """Collect supporting arrays.
         Parameters
@@ -134,7 +129,7 @@ class Masked(Forwards):
 class Thinning(Masked):
     """A class to represent a thinned dataset."""
-    def __init__(self, forward: Dataset, thinning: Optional[int], method: str) -> None:
+    def __init__(self, forward: Dataset, thinning: int | None, method: str) -> None:
         """Initialize the Thinning class.
         Parameters
@@ -195,7 +190,7 @@ class Thinning(Masked):
         """
         return Node(self, [self.forward.tree()], thinning=self.thinning, method=self.method)
-    def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
         """Get the metadata specific to the Thinning subclass.
         Returns
@@ -209,7 +204,7 @@ class Thinning(Masked):
 class Cropping(Masked):
     """A class to represent a cropped dataset."""
-    def __init__(self, forward: Dataset, area: Union[Dataset, Tuple[float, float, float, float]]) -> None:
+    def __init__(self, forward: Dataset, area: Dataset | tuple[float, float, float, float]) -> None:
         """Initialize the Cropping class.
         Parameters
@@ -245,7 +240,7 @@ class Cropping(Masked):
         """
         return Node(self, [self.forward.tree()], area=self.area)
-    def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
         """Get the metadata specific to the Cropping subclass.
         Returns
@@ -314,7 +309,7 @@ class TrimEdge(Masked):
         """
         return Node(self, [self.forward.tree()], edge=self.edge)
-    def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
         """Get the metadata specific to the TrimEdge subclass.
         Returns

anemoi/datasets/data/merge.py CHANGED Viewed

@@ -12,10 +12,6 @@ import datetime
 import logging
 from functools import cached_property
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Set
-from typing import Tuple
 import numpy as np
 from numpy.typing import NDArray
@@ -40,7 +36,7 @@ LOG = logging.getLogger(__name__)
 class Merge(Combined):
     """A class to merge multiple datasets along the dates axis, handling gaps in dates if allowed."""
-    def __init__(self, datasets: List[Dataset], allow_gaps_in_dates: bool = False) -> None:
+    def __init__(self, datasets: list[Dataset], allow_gaps_in_dates: bool = False) -> None:
         """Initialize the Merge object.
         Parameters
@@ -128,10 +124,10 @@ class Merge(Combined):
         return self._frequency
     @cached_property
-    def missing(self) -> Set[int]:
+    def missing(self) -> set[int]:
         """Get the indices of missing dates in the merged dataset."""
         # TODO: optimize
-        result: Set[int] = set()
+        result: set[int] = set()
         for i, (dataset, row) in enumerate(self._indices):
             if dataset == self._missing_index:
@@ -192,7 +188,7 @@ class Merge(Combined):
         """
         return Node(self, [d.tree() for d in self.datasets], allow_gaps_in_dates=self.allow_gaps_in_dates)
-    def metadata_specific(self) -> Dict[str, Any]:
+    def metadata_specific(self) -> dict[str, Any]:
         """Get the specific metadata for the merged dataset.
         Returns
@@ -265,7 +261,7 @@ class Merge(Combined):
         return np.stack([self[i] for i in range(*s.indices(self._len))])
-def merge_factory(args: Tuple, kwargs: Dict[str, Any]) -> Dataset:
+def merge_factory(args: tuple, kwargs: dict[str, Any]) -> Dataset:
     """Factory function to create a merged dataset.
     Parameters

anemoi/datasets/data/misc.py CHANGED Viewed

@@ -15,11 +15,6 @@ import os
 from pathlib import PurePath
 from typing import TYPE_CHECKING
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Optional
-from typing import Tuple
-from typing import Union
 import numpy as np
 import zarr
@@ -33,7 +28,7 @@ if TYPE_CHECKING:
 LOG = logging.getLogger(__name__)
-def load_config() -> Dict[str, Any]:
+def load_config() -> dict[str, Any]:
     """Load the configuration settings.
     Returns
@@ -110,10 +105,10 @@ def round_datetime(d: np.datetime64, dates: NDArray[np.datetime64], up: bool) ->
 def _as_date(
-    d: Union[int, str, np.datetime64, datetime.date],
+    d: int | str | np.datetime64 | datetime.date,
     dates: NDArray[np.datetime64],
     last: bool,
-    frequency: Optional[datetime.timedelta] = None,
+    frequency: datetime.timedelta | None = None,
 ) -> np.datetime64:
     """Convert a date to a numpy datetime64 object, rounding to the nearest date in a list of dates.
@@ -221,8 +216,8 @@ def _as_date(
         if "-" in d and ":" in d:
             date, time = d.replace(" ", "T").split("T")
-            year, month, day = [int(_) for _ in date.split("-")]
-            hour, minute, second = [int(_) for _ in time.split(":")]
+            year, month, day = (int(_) for _ in date.split("-"))
+            hour, minute, second = (int(_) for _ in time.split(":"))
             return _as_date(
                 np.datetime64(f"{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}"),
                 dates,
@@ -258,9 +253,9 @@ def _as_date(
 def as_first_date(
-    d: Union[int, str, np.datetime64, datetime.date],
+    d: int | str | np.datetime64 | datetime.date,
     dates: NDArray[np.datetime64],
-    frequency: Optional[datetime.timedelta] = None,
+    frequency: datetime.timedelta | None = None,
 ) -> np.datetime64:
     """Convert a date to the first date in a list of dates.
@@ -282,9 +277,9 @@ def as_first_date(
 def as_last_date(
-    d: Union[int, str, np.datetime64, datetime.date],
+    d: int | str | np.datetime64 | datetime.date,
     dates: NDArray[np.datetime64],
-    frequency: Optional[datetime.timedelta] = None,
+    frequency: datetime.timedelta | None = None,
 ) -> np.datetime64:
     """Convert a date to the last date in a list of dates.
@@ -305,7 +300,7 @@ def as_last_date(
     return _as_date(d, dates, last=True, frequency=frequency)
-def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple["Dataset", Dict[str, Any]]:
+def _concat_or_join(datasets: list["Dataset"], kwargs: dict[str, Any]) -> tuple["Dataset", dict[str, Any]]:
     """Concatenate or join datasets based on their date ranges.
     Parameters
@@ -317,7 +312,7 @@ def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple[
     Returns
     -------
-    Tuple[Dataset, Dict[str, Any]]
+    tuple[Dataset, Dict[str, Any]]
         The concatenated or joined dataset and remaining arguments.
     """
     if "adjust" in kwargs:
@@ -339,12 +334,12 @@ def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple[
     return Concat(datasets), kwargs
-def _open(a: Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]) -> "Dataset":
+def _open(a: str | PurePath | dict[str, Any] | list[Any] | tuple[Any, ...]) -> "Dataset":
     """Open a dataset from various input types.
     Parameters
     ----------
-    a : Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]
+    a : Union[str, PurePath, Dict[str, Any], List[Any], tuple[Any, ...]]
         The input to open.
     Returns
@@ -390,10 +385,10 @@ def _open(a: Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]) -
 def _auto_adjust(
-    datasets: List["Dataset"],
-    kwargs: Dict[str, Any],
-    exclude: Optional[List[str]] = None,
-) -> Tuple[List["Dataset"], Dict[str, Any]]:
+    datasets: list["Dataset"],
+    kwargs: dict[str, Any],
+    exclude: list[str] | None = None,
+) -> tuple[list["Dataset"], dict[str, Any]]:
     """Automatically adjust datasets based on specified criteria.
     Parameters
@@ -407,7 +402,7 @@ def _auto_adjust(
     Returns
     -------
-    Tuple[List[Dataset], Dict[str, Any]]
+    tuple[List[Dataset], Dict[str, Any]]
         The adjusted datasets and remaining arguments.
     """
     if "adjust" not in kwargs:
@@ -620,7 +615,7 @@ def append_to_zarr(new_data: np.ndarray, new_dates: np.ndarray, zarr_path: str)
     # Re-open the zarr store to avoid root object accumulating memory.
     root = zarr.open(zarr_path, mode="a")
     # Convert new dates to strings (using str) regardless of input dtype.
-    new_dates = np.array(new_dates, dtype="datetime64[ns]")
+    new_dates = np.array(new_dates, dtype="datetime64[s]")
     dates_ds = root["dates"]
     old_len = dates_ds.shape[0]
     dates_ds.resize((old_len + len(new_dates),))
@@ -633,19 +628,19 @@ def append_to_zarr(new_data: np.ndarray, new_dates: np.ndarray, zarr_path: str)
     data_ds[old_shape[0] :] = new_data
-def process_date(date: Any, big_dataset: Any) -> Tuple[np.ndarray, np.ndarray]:
+def process_date(date: Any, big_dataset: "Dataset") -> tuple[np.ndarray, np.ndarray]:
     """Open the subset corresponding to the given date and return (date, subset).
     Parameters
     ----------
     date : Any
         The date to process.
-    big_dataset : Any
+    big_dataset : Dataset
         The dataset to process.
     Returns
     -------
-    Tuple[np.ndarray, np.ndarray]
+    tuple[np.ndarray, np.ndarray]
         The subset and the date.
     """
     print("Processing:", date, flush=True)
@@ -655,26 +650,24 @@ def process_date(date: Any, big_dataset: Any) -> Tuple[np.ndarray, np.ndarray]:
     return s, date
-def initialize_zarr_store(root: Any, big_dataset: Any, recipe: Dict[str, Any]) -> None:
+def initialize_zarr_store(root: Any, big_dataset: "Dataset") -> None:
     """Initialize the Zarr store with the given dataset and recipe.
     Parameters
     ----------
     root : Any
-        The root of the Zarr store.
-    big_dataset : Any
+        The root Zarr store.
+    big_dataset : Dataset
         The dataset to initialize the store with.
-    recipe : Dict[str, Any]
-        The recipe for initializing the store.
     """
-    ensembles = big_dataset.shape[1]
+    ensembles = big_dataset.shape[2]
     # Create or append to "dates" dataset.
     if "dates" not in root:
         full_length = len(big_dataset.dates)
         root.create_dataset("dates", data=np.array([], dtype="datetime64[s]"), chunks=(full_length,))
     if "data" not in root:
-        dims = (1, len(big_dataset.variables), ensembles, big_dataset.grids[0])
+        dims = (1, len(big_dataset.variables), ensembles, big_dataset.shape[-1])
         root.create_dataset(
             "data",
             shape=dims,
@@ -694,25 +687,28 @@ def initialize_zarr_store(root: Any, big_dataset: Any, recipe: Dict[str, Any]) -
     if "latitudes" not in root or "longitudes" not in root:
         root.create_dataset("latitudes", data=big_dataset.latitudes, compressor=None)
         root.create_dataset("longitudes", data=big_dataset.longitudes, compressor=None)
+    for k, v in big_dataset.metadata().items():
+        if k not in root.attrs:
+            root.attrs[k] = v
     # Set store-wide attributes if not already set.
-    if "frequency" not in root.attrs:
-        root.attrs["frequency"] = "10m"
-        root.attrs["resolution"] = "1km"
+    if "first_date" not in root.attrs:
+        root.attrs["first_date"] = big_dataset.metadata()["start_date"]
+        root.attrs["last_date"] = big_dataset.metadata()["end_date"]
+        root.attrs["resolution"] = big_dataset.resolution
         root.attrs["name_to_index"] = {k: i for i, k in enumerate(big_dataset.variables)}
-        root.attrs["ensemble_dimension"] = 1
+        root.attrs["ensemble_dimension"] = 2
         root.attrs["field_shape"] = big_dataset.field_shape
         root.attrs["flatten_grid"] = True
-        root.attrs["recipe"] = recipe
+        root.attrs["recipe"] = {}
-def _save_dataset(recipe: Dict[str, Any], zarr_path: str, n_workers: int = 1) -> None:
+def _save_dataset(dataset: "Dataset", zarr_path: str, n_workers: int = 1) -> None:
     """Incrementally create (or update) a Zarr store from an Anemoi dataset.
     Parameters
     ----------
-    recipe : Dict[str, Any]
-        The recipe for creating the dataset.
+    dataset : Dataset
+        anemoi-dataset opened from python to save to Zarr store
     zarr_path : str
         The path to the Zarr store.
     n_workers : int, optional
@@ -728,13 +724,13 @@ def _save_dataset(recipe: Dict[str, Any], zarr_path: str, n_workers: int = 1) ->
     """
     from concurrent.futures import ProcessPoolExecutor
-    full_ds = _open_dataset(recipe).mutate()
+    full_ds = dataset
     print("Opened full dataset.", flush=True)
     # Use ProcessPoolExecutor for parallel data extraction.
     # Workers return (date, subset) tuples.
     root = zarr.open(zarr_path, mode="a")
-    initialize_zarr_store(root, full_ds, recipe)
+    initialize_zarr_store(root, full_ds)
     print("Zarr store initialised.", flush=True)
     existing_dates = np.array(sorted(root["dates"]), dtype="datetime64[s]")

anemoi/datasets/data/missing.py CHANGED Viewed

@@ -12,11 +12,6 @@ import datetime
 import logging
 from functools import cached_property
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Set
-from typing import Tuple
-from typing import Union
 import numpy as np
 from numpy.typing import NDArray
@@ -49,7 +44,7 @@ class MissingDates(Forwards):
         List of missing dates.
     """
-    def __init__(self, dataset: Dataset, missing_dates: List[Union[int, str]]) -> None:
+    def __init__(self, dataset: Dataset, missing_dates: list[int | str]) -> None:
         """Initializes the MissingDates class.
         Parameters
@@ -80,13 +75,13 @@ class MissingDates(Forwards):
                     self.missing_dates.append(date)
         n = self.forward._len
-        self._missing = set(i for i in self._missing if 0 <= i < n)
+        self._missing = {i for i in self._missing if 0 <= i < n}
         self.missing_dates = sorted(to_datetime(x) for x in self.missing_dates)
         assert len(self._missing), "No dates to force missing"
     @cached_property
-    def missing(self) -> Set[int]:
+    def missing(self) -> set[int]:
         """Returns the set of missing indices."""
         return self._missing.union(self.forward.missing)
@@ -148,7 +143,7 @@ class MissingDates(Forwards):
         raise MissingDateError(f"Date {self.forward.dates[n]} is missing (index={n})")
     @property
-    def reason(self) -> Dict[str, Any]:
+    def reason(self) -> dict[str, Any]:
         """Provides the reason for missing dates."""
         return {"missing_dates": self.missing_dates}
@@ -162,7 +157,7 @@ class MissingDates(Forwards):
         """
         return Node(self, [self.forward.tree()], **self.reason)
-    def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
         """Provides metadata specific to the subclass.
         Returns
@@ -184,7 +179,7 @@ class SkipMissingDates(Forwards):
         The expected access pattern.
     """
-    def __init__(self, dataset: Dataset, expected_access: Union[int, slice]) -> None:
+    def __init__(self, dataset: Dataset, expected_access: int | slice) -> None:
         """Initializes the SkipMissingDates class.
         Parameters
@@ -285,7 +280,7 @@ class SkipMissingDates(Forwards):
         return tuple(np.stack(_) for _ in result)
     @debug_indexing
-    def _get_slice(self, s: slice) -> Tuple[NDArray[Any], ...]:
+    def _get_slice(self, s: slice) -> tuple[NDArray[Any], ...]:
         """Retrieves a slice of items.
         Parameters
@@ -303,7 +298,7 @@ class SkipMissingDates(Forwards):
         return tuple(np.stack(_) for _ in result)
     @debug_indexing
-    def __getitem__(self, n: FullIndex) -> Tuple[NDArray[Any], ...]:
+    def __getitem__(self, n: FullIndex) -> tuple[NDArray[Any], ...]:
         """Retrieves the item at the given index.
         Parameters
@@ -339,7 +334,7 @@ class SkipMissingDates(Forwards):
         """
         return Node(self, [self.forward.tree()], expected_access=self.expected_access)
-    def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
         """Provides metadata specific to the subclass.
         Returns
@@ -404,7 +399,7 @@ class MissingDataset(Forwards):
         return self._dates
     @property
-    def missing(self) -> Set[int]:
+    def missing(self) -> set[int]:
         """Returns the set of missing indices."""
         return self._missing
@@ -436,7 +431,7 @@ class MissingDataset(Forwards):
         """
         return Node(self, [self.forward.tree()], start=self.start, end=self.end)
-    def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
+    def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
         """Provides metadata specific to the subclass.
         Returns

anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

anemoi-datasets 0.5.26py3-none-any.whl → 0.5.28py3-none-any.whl