PyPI - anemoi-datasets - Versions diffs - 0.5.25__py3-none-any.whl → 0.5.27__py3-none-any.whl - Mend

anemoi-datasets 0.5.25py3-none-any.whl → 0.5.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

anemoi/datasets/__init__.py +1 -2
anemoi/datasets/_version.py +16 -3
anemoi/datasets/commands/check.py +1 -1
anemoi/datasets/commands/copy.py +1 -2
anemoi/datasets/commands/create.py +1 -1
anemoi/datasets/commands/grib-index.py +1 -1
anemoi/datasets/commands/inspect.py +27 -35
anemoi/datasets/commands/validate.py +59 -0
anemoi/datasets/compute/recentre.py +3 -6
anemoi/datasets/create/__init__.py +22 -25
anemoi/datasets/create/check.py +10 -12
anemoi/datasets/create/chunks.py +1 -2
anemoi/datasets/create/config.py +3 -6
anemoi/datasets/create/filter.py +21 -24
anemoi/datasets/create/input/__init__.py +1 -2
anemoi/datasets/create/input/action.py +3 -5
anemoi/datasets/create/input/concat.py +5 -8
anemoi/datasets/create/input/context.py +3 -6
anemoi/datasets/create/input/data_sources.py +5 -8
anemoi/datasets/create/input/empty.py +1 -2
anemoi/datasets/create/input/filter.py +2 -3
anemoi/datasets/create/input/function.py +1 -2
anemoi/datasets/create/input/join.py +4 -5
anemoi/datasets/create/input/misc.py +4 -6
anemoi/datasets/create/input/repeated_dates.py +13 -18
anemoi/datasets/create/input/result.py +29 -33
anemoi/datasets/create/input/step.py +6 -24
anemoi/datasets/create/input/template.py +3 -4
anemoi/datasets/create/input/trace.py +1 -1
anemoi/datasets/create/patch.py +1 -2
anemoi/datasets/create/persistent.py +3 -5
anemoi/datasets/create/size.py +1 -3
anemoi/datasets/create/sources/accumulations.py +47 -52
anemoi/datasets/create/sources/accumulations2.py +4 -8
anemoi/datasets/create/sources/constants.py +1 -3
anemoi/datasets/create/sources/empty.py +1 -2
anemoi/datasets/create/sources/fdb.py +133 -0
anemoi/datasets/create/sources/forcings.py +1 -2
anemoi/datasets/create/sources/grib.py +6 -10
anemoi/datasets/create/sources/grib_index.py +13 -15
anemoi/datasets/create/sources/hindcasts.py +2 -5
anemoi/datasets/create/sources/legacy.py +1 -1
anemoi/datasets/create/sources/mars.py +17 -21
anemoi/datasets/create/sources/netcdf.py +1 -2
anemoi/datasets/create/sources/opendap.py +1 -3
anemoi/datasets/create/sources/patterns.py +4 -6
anemoi/datasets/create/sources/planetary_computer.py +44 -0
anemoi/datasets/create/sources/recentre.py +8 -11
anemoi/datasets/create/sources/source.py +3 -6
anemoi/datasets/create/sources/tendencies.py +2 -5
anemoi/datasets/create/sources/xarray.py +4 -6
anemoi/datasets/create/sources/xarray_support/__init__.py +15 -32
anemoi/datasets/create/sources/xarray_support/coordinates.py +16 -12
anemoi/datasets/create/sources/xarray_support/field.py +17 -16
anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
anemoi/datasets/create/sources/xarray_support/flavour.py +83 -45
anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
anemoi/datasets/create/sources/xarray_support/patch.py +47 -6
anemoi/datasets/create/sources/xarray_support/time.py +10 -13
anemoi/datasets/create/sources/xarray_support/variable.py +27 -23
anemoi/datasets/create/sources/xarray_zarr.py +1 -2
anemoi/datasets/create/sources/zenodo.py +3 -5
anemoi/datasets/create/statistics/__init__.py +3 -6
anemoi/datasets/create/testing.py +2 -74
anemoi/datasets/create/typing.py +1 -2
anemoi/datasets/create/utils.py +1 -2
anemoi/datasets/create/zarr.py +7 -2
anemoi/datasets/data/__init__.py +15 -6
anemoi/datasets/data/complement.py +52 -23
anemoi/datasets/data/concat.py +5 -8
anemoi/datasets/data/dataset.py +42 -47
anemoi/datasets/data/debug.py +7 -9
anemoi/datasets/data/ensemble.py +4 -6
anemoi/datasets/data/fill_missing.py +7 -10
anemoi/datasets/data/forwards.py +30 -28
anemoi/datasets/data/grids.py +12 -16
anemoi/datasets/data/indexing.py +9 -12
anemoi/datasets/data/interpolate.py +7 -15
anemoi/datasets/data/join.py +8 -12
anemoi/datasets/data/masked.py +6 -11
anemoi/datasets/data/merge.py +5 -9
anemoi/datasets/data/misc.py +41 -45
anemoi/datasets/data/missing.py +11 -16
anemoi/datasets/data/observations/__init__.py +8 -14
anemoi/datasets/data/padded.py +3 -5
anemoi/datasets/data/records/backends/__init__.py +2 -2
anemoi/datasets/data/rescale.py +5 -12
anemoi/datasets/data/select.py +13 -16
anemoi/datasets/data/statistics.py +4 -7
anemoi/datasets/data/stores.py +23 -77
anemoi/datasets/data/subset.py +8 -11
anemoi/datasets/data/unchecked.py +7 -11
anemoi/datasets/data/xy.py +25 -21
anemoi/datasets/dates/__init__.py +13 -18
anemoi/datasets/dates/groups.py +7 -10
anemoi/datasets/grids.py +11 -12
anemoi/datasets/testing.py +93 -7
anemoi/datasets/validate.py +598 -0
{anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +5 -4
anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
anemoi/datasets/create/filters/__init__.py +0 -33
anemoi/datasets/create/filters/empty.py +0 -37
anemoi/datasets/create/filters/legacy.py +0 -93
anemoi/datasets/create/filters/noop.py +0 -37
anemoi/datasets/create/filters/orog_to_z.py +0 -58
anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
anemoi/datasets/create/filters/rename.py +0 -205
anemoi/datasets/create/filters/rotate_winds.py +0 -105
anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
anemoi/datasets/create/filters/sum.py +0 -68
anemoi/datasets/create/filters/transform.py +0 -51
anemoi/datasets/create/filters/unrotate_winds.py +0 -105
anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
anemoi/datasets/create/filters/wz_to_w.py +0 -98
anemoi/datasets/utils/__init__.py +0 -8
anemoi_datasets-0.5.25.dist-info/RECORD +0 -150
{anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
{anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
{anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0

anemoi/datasets/__init__.py CHANGED Viewed

@@ -7,7 +7,6 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
-from typing import List
 from .data import MissingDateError
 from .data import add_dataset_path
@@ -23,7 +22,7 @@ except ImportError:  # pragma: no cover
     # Local copy or not installed with setuptools
     __version__ = "999"
-__all__: List[str] = [
+__all__: list[str] = [
     "add_dataset_path",
     "add_named_dataset",
     "list_dataset_names",

anemoi/datasets/_version.py CHANGED Viewed

@@ -1,7 +1,14 @@
 # file generated by setuptools-scm
 # don't change, don't track in version control
-__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
 TYPE_CHECKING = False
 if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
     from typing import Union
     VERSION_TUPLE = Tuple[Union[int, str], ...]
+    COMMIT_ID = Union[str, None]
 else:
     VERSION_TUPLE = object
+    COMMIT_ID = object
 version: str
 __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
+commit_id: COMMIT_ID
+__commit_id__: COMMIT_ID
-__version__ = version = '0.5.25'
-__version_tuple__ = version_tuple = (0, 5, 25)
+__version__ = version = '0.5.27'
+__version_tuple__ = version_tuple = (0, 5, 27)
+__commit_id__ = commit_id = None

anemoi/datasets/commands/check.py CHANGED Viewed

@@ -77,7 +77,7 @@ class Check(Command):
         recipe_filename = os.path.basename(recipe)
         recipe_name = os.path.splitext(recipe_filename)[0]
-        in_recipe_name = yaml.safe_load(open(recipe, "r", encoding="utf-8"))["name"]
+        in_recipe_name = yaml.safe_load(open(recipe, encoding="utf-8"))["name"]
         if recipe_name != in_recipe_name:
             print(f"Recipe name {recipe_name} does not match the name in the recipe file {in_recipe_name}")

anemoi/datasets/commands/copy.py CHANGED Viewed

@@ -14,7 +14,6 @@ import sys
 from concurrent.futures import ThreadPoolExecutor
 from concurrent.futures import as_completed
 from typing import Any
-from typing import Optional
 import tqdm
 from anemoi.utils.remote import Transfer
@@ -136,7 +135,7 @@ class ZarrCopier:
             return zarr.storage.NestedDirectoryStore(path)
         return path
-    def copy_chunk(self, n: int, m: int, source: Any, target: Any, _copy: Any, verbosity: int) -> Optional[slice]:
+    def copy_chunk(self, n: int, m: int, source: Any, target: Any, _copy: Any, verbosity: int) -> slice | None:
         """Copy a chunk of data from source to target.
         Parameters

anemoi/datasets/commands/create.py CHANGED Viewed

@@ -120,7 +120,7 @@ class Create(Command):
         task("finalise", options)
         task("init_additions", options)
-        task("run_additions", options)
+        task("load_additions", options)
         task("finalise_additions", options)
         task("patch", options)

anemoi/datasets/commands/grib-index.py CHANGED Viewed

@@ -81,7 +81,7 @@ class GribIndexCmd(Command):
             bool
                 True if the path matches, False otherwise.
             """
-            return fnmatch.fnmatch(path, args.match)
+            return fnmatch.fnmatch(os.path.basename(path), args.match)
         from anemoi.datasets.create.sources.grib_index import GribIndex

anemoi/datasets/commands/inspect.py CHANGED Viewed

@@ -14,10 +14,6 @@ import os
 from copy import deepcopy
 from functools import cached_property
 from typing import Any
-from typing import Dict
-from typing import List
-from typing import Optional
-from typing import Union
 import numpy as np
 import semantic_version
@@ -39,7 +35,7 @@ from . import Command
 LOG = logging.getLogger(__name__)
-def compute_directory_size(path: str) -> Union[tuple[int, int], tuple[None, None]]:
+def compute_directory_size(path: str) -> tuple[int, int] | tuple[None, None]:
     """Compute the total size and number of files in a directory.
     Parameters
@@ -104,7 +100,7 @@ def cos_local_time_bug(lon: float, date: datetime.datetime) -> float:
     return np.cos(radians)
-def find(config: Union[dict, list], name: str) -> Any:
+def find(config: dict | list, name: str) -> Any:
     """Recursively search for a key in a nested dictionary or list.
     Parameters
@@ -167,7 +163,7 @@ class Version:
         print(f"🔢 Format version: {self.version}")
     @property
-    def name_to_index(self) -> Dict[str, int]:
+    def name_to_index(self) -> dict[str, int]:
         """Get a mapping of variable names to their indices."""
         return find(self.metadata, "name_to_index")
@@ -208,30 +204,30 @@ class Version:
         return self.metadata["resolution"]
     @property
-    def field_shape(self) -> Optional[tuple]:
+    def field_shape(self) -> tuple | None:
         """Get the field shape of the dataset."""
         return self.metadata.get("field_shape")
     @property
-    def proj_string(self) -> Optional[str]:
+    def proj_string(self) -> str | None:
         """Get the projection string of the dataset."""
         return self.metadata.get("proj_string")
     @property
-    def shape(self) -> Optional[tuple]:
+    def shape(self) -> tuple | None:
         """Get the shape of the dataset."""
         if self.data and hasattr(self.data, "shape"):
             return self.data.shape
     @property
-    def n_missing_dates(self) -> Optional[int]:
+    def n_missing_dates(self) -> int | None:
         """Get the number of missing dates in the dataset."""
         if "missing_dates" in self.metadata:
             return len(self.metadata["missing_dates"])
         return None
     @property
-    def uncompressed_data_size(self) -> Optional[int]:
+    def uncompressed_data_size(self) -> int | None:
         """Get the uncompressed data size of the dataset."""
         if self.data and hasattr(self.data, "dtype") and hasattr(self.data, "size"):
             return self.data.dtype.itemsize * self.data.size
@@ -258,7 +254,7 @@ class Version:
         print()
         shape_str = "📐 Shape      : "
         if self.shape:
-            shape_str += " × ".join(["{:,}".format(s) for s in self.shape])
+            shape_str += " × ".join([f"{s:,}" for s in self.shape])
         if self.uncompressed_data_size:
             shape_str += f" ({bytes(self.uncompressed_data_size)})"
         print(shape_str)
@@ -293,17 +289,17 @@ class Version:
         print()
     @property
-    def variables(self) -> List[str]:
+    def variables(self) -> list[str]:
         """Get the list of variables in the dataset."""
         return [v[0] for v in sorted(self.name_to_index.items(), key=lambda x: x[1])]
     @property
-    def total_size(self) -> Optional[int]:
+    def total_size(self) -> int | None:
         """Get the total size of the dataset."""
         return self.zarr.attrs.get("total_size")
     @property
-    def total_number_of_files(self) -> Optional[int]:
+    def total_number_of_files(self) -> int | None:
         """Get the total number of files in the dataset."""
         return self.zarr.attrs.get("total_number_of_files")
@@ -348,7 +344,7 @@ class Version:
         return False
     @property
-    def statistics_started(self) -> Optional[datetime.datetime]:
+    def statistics_started(self) -> datetime.datetime | None:
         """Get the timestamp when statistics computation started."""
         for d in reversed(self.metadata.get("history", [])):
             if d["action"] == "compute_statistics_start":
@@ -356,12 +352,12 @@ class Version:
         return None
     @property
-    def build_flags(self) -> Optional[NDArray[Any]]:
+    def build_flags(self) -> NDArray[Any] | None:
         """Get the build flags of the dataset."""
         return self.zarr.get("_build_flags")
     @cached_property
-    def copy_flags(self) -> Optional[NDArray[Any]]:
+    def copy_flags(self) -> NDArray[Any] | None:
         """Get the copy flags of the dataset."""
         if "_copy" not in self.zarr:
             return None
@@ -381,7 +377,7 @@ class Version:
         return not all(self.copy_flags)
     @property
-    def build_lengths(self) -> Optional[NDArray]:
+    def build_lengths(self) -> NDArray | None:
         """Get the build lengths of the dataset."""
         return self.zarr.get("_build_lengths")
@@ -396,17 +392,13 @@ class Version:
             print(
                 "📈 Progress:",
                 progress(built, total, width=50),
-                "{:.0f}%".format(built / total * 100),
+                f"{built / total * 100:.0f}%",
             )
             return
-        if self.build_flags is None:
-            print("🪫 Dataset not initialised")
-            return
-        build_flags = self.build_flags
+        build_flags = self.build_flags or np.array([], dtype=bool)
-        build_lengths = self.build_lengths
+        build_lengths = self.build_lengths or np.array([], dtype=bool)
         assert build_flags.size == build_lengths.size
         latest_write_timestamp = self.zarr.attrs.get("latest_write_timestamp")
@@ -422,7 +414,7 @@ class Version:
             print(
                 "📈 Progress:",
                 progress(built, total, width=50),
-                "{:.0f}%".format(built / total * 100),
+                f"{built / total * 100:.0f}%",
             )
             start = self.initialised
             if self.initialised:
@@ -623,7 +615,7 @@ class Version0_6(Version):
     """Represents version 0.6 of a dataset."""
     @property
-    def initialised(self) -> Optional[datetime.datetime]:
+    def initialised(self) -> datetime.datetime | None:
         """Get the initialization timestamp of the dataset."""
         for record in self.metadata.get("history", []):
             if record["action"] == "initialised":
@@ -659,12 +651,12 @@ class Version0_6(Version):
         return all(build_flags)
     @property
-    def name_to_index(self) -> Dict[str, int]:
+    def name_to_index(self) -> dict[str, int]:
         """Get a mapping of variable names to their indices."""
         return {n: i for i, n in enumerate(self.metadata["variables"])}
     @property
-    def variables(self) -> List[str]:
+    def variables(self) -> list[str]:
         """Get the list of variables in the dataset."""
         return self.metadata["variables"]
@@ -706,7 +698,7 @@ class Version0_13(Version0_12):
     """Represents version 0.13 of a dataset."""
     @property
-    def build_flags(self) -> Optional[NDArray]:
+    def build_flags(self) -> NDArray | None:
         """Get the build flags for the dataset."""
         if "_build" not in self.zarr:
             return None
@@ -714,7 +706,7 @@ class Version0_13(Version0_12):
         return build.get("flags")
     @property
-    def build_lengths(self) -> Optional[NDArray]:
+    def build_lengths(self) -> NDArray | None:
         """Get the build lengths for the dataset."""
         if "_build" not in self.zarr:
             return None
@@ -792,10 +784,10 @@ class InspectZarr(Command):
         try:
             if progress:
-                return version.progress()
+                version.progress()
             if statistics:
-                return version.brute_force_statistics()
+                version.brute_force_statistics()
             version.info(detailed, size)

anemoi/datasets/commands/validate.py ADDED Viewed

@@ -0,0 +1,59 @@
+# (C) Copyright 2025 Anemoi contributors.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+import importlib
+import logging
+from typing import Any
+from anemoi.datasets.validate import validate_dataset
+from . import Command
+LOG = logging.getLogger(__name__)
+DEFAULT_DATASET = "aifs-ea-an-oper-0001-mars-o96-1979-2023-6h-v8"
+class Validate(Command):
+    """Command to validate an anemoi dataset."""
+    def add_arguments(self, command_parser: Any) -> None:
+        """Add arguments to the command parser.
+        Parameters
+        ----------
+        command_parser : Any
+            The command parser.
+        """
+        command_parser.add_argument("--callable", metavar="DATASET", default="anemoi.datasets.open_dataset")
+        command_parser.add_argument("--costly-checks", action="store_true", help="Run costly checks")
+        command_parser.add_argument("--detailed", action="store_true", help="Give detailed report")
+        command_parser.add_argument("path", metavar="DATASET")
+    def run(self, args: Any) -> None:
+        """Run the command.
+        Parameters
+        ----------
+        args : Any
+            The command arguments.
+        """
+        module_path, func_name = args.callable.rsplit(".", 1)
+        module = importlib.import_module(module_path)
+        callable_func = getattr(module, func_name)
+        if args.path == "default":
+            args.path = DEFAULT_DATASET
+        dataset = callable_func(args.path)
+        validate_dataset(dataset, costly_checks=args.costly_checks, detailed=args.detailed)
+command = Validate

anemoi/datasets/compute/recentre.py CHANGED Viewed

@@ -10,9 +10,6 @@
 import logging
 from typing import Any
-from typing import Dict
-from typing import Optional
-from typing import Tuple
 import numpy as np
 from earthkit.data.core.temporary import temp_file
@@ -36,7 +33,7 @@ SKIP = ("class", "stream", "type", "number", "expver", "_leg_number", "anoffset"
 def check_compatible(
-    f1: Any, f2: Any, centre_field_as_mars: Dict[str, Any], ensemble_field_as_mars: Dict[str, Any]
+    f1: Any, f2: Any, centre_field_as_mars: dict[str, Any], ensemble_field_as_mars: dict[str, Any]
 ) -> None:
     """Check if two fields are compatible.
@@ -75,9 +72,9 @@ def recentre(
     *,
     members: Any,
     centre: Any,
-    clip_variables: Tuple[str, ...] = CLIP_VARIABLES,
+    clip_variables: tuple[str, ...] = CLIP_VARIABLES,
     alpha: float = 1.0,
-    output: Optional[str] = None,
+    output: str | None = None,
 ) -> Any:
     """Recentre ensemble members around the centre field.

anemoi/datasets/create/__init__.py CHANGED Viewed

@@ -16,8 +16,6 @@ import uuid
 import warnings
 from functools import cached_property
 from typing import Any
-from typing import Optional
-from typing import Union
 import cftime
 import numpy as np
@@ -102,8 +100,8 @@ def json_tidy(o: Any) -> Any:
 def build_statistics_dates(
     dates: list[datetime.datetime],
-    start: Optional[datetime.datetime],
-    end: Optional[datetime.datetime],
+    start: datetime.datetime | None,
+    end: datetime.datetime | None,
 ) -> tuple[str, str]:
     """Compute the start and end dates for the statistics.
@@ -359,7 +357,7 @@ class Actor:  # TODO: rename to Creator
     dataset_class = WritableDataset
-    def __init__(self, path: str, cache: Optional[str] = None):
+    def __init__(self, path: str, cache: str | None = None):
         """Initialize an Actor instance.
         Parameters
@@ -577,10 +575,10 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         check_name: bool = False,
         overwrite: bool = False,
         use_threads: bool = False,
-        statistics_temp_dir: Optional[str] = None,
+        statistics_temp_dir: str | None = None,
         progress: Any = None,
         test: bool = False,
-        cache: Optional[str] = None,
+        cache: str | None = None,
         **kwargs: Any,
     ):
         """Initialize an Init instance.
@@ -809,11 +807,11 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
     def __init__(
         self,
         path: str,
-        parts: Optional[str] = None,
+        parts: str | None = None,
         use_threads: bool = False,
-        statistics_temp_dir: Optional[str] = None,
+        statistics_temp_dir: str | None = None,
         progress: Any = None,
-        cache: Optional[str] = None,
+        cache: str | None = None,
         **kwargs: Any,
     ):
         """Initialize a Load instance.
@@ -907,8 +905,8 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
                 print("Requested dates", compress_dates(dates))
                 print("Cube dates", compress_dates(dates_in_data))
-                a = set(as_datetime(_) for _ in dates)
-                b = set(as_datetime(_) for _ in dates_in_data)
+                a = {as_datetime(_) for _ in dates}
+                b = {as_datetime(_) for _ in dates_in_data}
                 print("Missing dates", compress_dates(a - b))
                 print("Extra dates", compress_dates(b - a))
@@ -958,7 +956,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         array.flush()
         LOG.info("Flushed data array")
-    def _get_allow_nans(self) -> Union[bool, list]:
+    def _get_allow_nans(self) -> bool | list:
         """Get the allow_nans configuration.
         Returns
@@ -991,7 +989,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
         total = cube.count(reading_chunks)
         LOG.debug(f"Loading datacube: {cube}")
-        def position(x: Any) -> Optional[int]:
+        def position(x: Any) -> int | None:
             if isinstance(x, str) and "/" in x:
                 x = x.split("/")
                 return int(x[0])
@@ -1038,7 +1036,7 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
     def __init__(
         self,
         path: str,
-        statistics_temp_dir: Optional[str] = None,
+        statistics_temp_dir: str | None = None,
         delta: list = [],
         use_threads: bool = False,
         **kwargs: Any,
@@ -1217,19 +1215,19 @@ class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
         LOG.info(f"Cleaned temporary storage {self.tmp_storage_path}")
-class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
+class _LoadAdditions(Actor, HasRegistryMixin, AdditionsMixin):
     """A class to run dataset additions."""
     def __init__(
         self,
         path: str,
         delta: str,
-        parts: Optional[str] = None,
+        parts: str | None = None,
         use_threads: bool = False,
         progress: Any = None,
         **kwargs: Any,
     ):
-        """Initialize a _RunAdditions instance.
+        """Initialize a _LoadAdditions instance.
         Parameters
         ----------
@@ -1469,7 +1467,7 @@ def multi_addition(cls: type) -> type:
 InitAdditions = multi_addition(_InitAdditions)
-RunAdditions = multi_addition(_RunAdditions)
+LoadAdditions = multi_addition(_LoadAdditions)
 FinaliseAdditions = multi_addition(_FinaliseAdditions)
@@ -1480,7 +1478,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
         self,
         path: str,
         use_threads: bool = False,
-        statistics_temp_dir: Optional[str] = None,
+        statistics_temp_dir: str | None = None,
         progress: Any = None,
         **kwargs: Any,
     ):
@@ -1539,7 +1537,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
         LOG.info(f"Wrote statistics in {self.path}")
     @cached_property
-    def allow_nans(self) -> Union[bool, list]:
+    def allow_nans(self) -> bool | list:
         """Check if NaNs are allowed."""
         import zarr
@@ -1581,7 +1579,7 @@ def chain(tasks: list) -> type:
     return Chain
-def creator_factory(name: str, trace: Optional[str] = None, **kwargs: Any) -> Any:
+def creator_factory(name: str, trace: str | None = None, **kwargs: Any) -> Any:
     """Create a dataset creator.
     Parameters
@@ -1612,10 +1610,9 @@ def creator_factory(name: str, trace: Optional[str] = None, **kwargs: Any) -> An
         cleanup=Cleanup,
         verify=Verify,
         init_additions=InitAdditions,
-        load_additions=RunAdditions,
-        run_additions=RunAdditions,
+        load_additions=LoadAdditions,
         finalise_additions=chain([FinaliseAdditions, Size]),
-        additions=chain([InitAdditions, RunAdditions, FinaliseAdditions, Size, Cleanup]),
+        additions=chain([InitAdditions, LoadAdditions, FinaliseAdditions, Size, Cleanup]),
     )[name]
     LOG.debug(f"Creating {cls.__name__} with {kwargs}")
     return cls(**kwargs)

anemoi/datasets/create/check.py CHANGED Viewed

@@ -12,10 +12,8 @@ import datetime
 import logging
 import re
 import warnings
+from collections.abc import Callable
 from typing import Any
-from typing import Callable
-from typing import Optional
-from typing import Union
 import numpy as np
 from anemoi.utils.config import load_config
@@ -31,10 +29,10 @@ class DatasetName:
     def __init__(
         self,
         name: str,
-        resolution: Optional[str] = None,
-        start_date: Optional[datetime.date] = None,
-        end_date: Optional[datetime.date] = None,
-        frequency: Optional[datetime.timedelta] = None,
+        resolution: str | None = None,
+        start_date: datetime.date | None = None,
+        end_date: datetime.date | None = None,
+        frequency: datetime.timedelta | None = None,
     ):
         """Initialize a DatasetName instance.
@@ -146,7 +144,7 @@ class DatasetName:
                 "https://anemoi-registry.readthedocs.io/en/latest/naming-conventions.html"
             )
-    def check_resolution(self, resolution: Optional[str]) -> None:
+    def check_resolution(self, resolution: str | None) -> None:
         """Check if the resolution matches the expected format.
         Parameters
@@ -175,7 +173,7 @@ class DatasetName:
             if not c.isalnum() and c not in "-":
                 self.messages.append(f"the {self.name} should only contain alphanumeric characters and '-'.")
-    def check_frequency(self, frequency: Optional[datetime.timedelta]) -> None:
+    def check_frequency(self, frequency: datetime.timedelta | None) -> None:
         """Check if the frequency matches the expected format.
         Parameters
@@ -189,7 +187,7 @@ class DatasetName:
         self._check_missing("frequency", frequency_str)
         self._check_mismatch("frequency", frequency_str)
-    def check_start_date(self, start_date: Optional[datetime.date]) -> None:
+    def check_start_date(self, start_date: datetime.date | None) -> None:
         """Check if the start date matches the expected format.
         Parameters
@@ -203,7 +201,7 @@ class DatasetName:
         self._check_missing("start_date", start_date_str)
         self._check_mismatch("start_date", start_date_str)
-    def check_end_date(self, end_date: Optional[datetime.date]) -> None:
+    def check_end_date(self, end_date: datetime.date | None) -> None:
         """Check if the end date matches the expected format.
         Parameters
@@ -251,7 +249,7 @@ class StatisticsValueError(ValueError):
 def check_data_values(
-    arr: NDArray[Any], *, name: str, log: list = [], allow_nans: Union[bool, list, set, tuple, dict] = False
+    arr: NDArray[Any], *, name: str, log: list = [], allow_nans: bool | list | set | tuple | dict = False
 ) -> None:
     """Check the values in the data array for validity.

anemoi/datasets/create/chunks.py CHANGED Viewed

@@ -9,7 +9,6 @@
 import logging
 import warnings
-from typing import Union
 LOG = logging.getLogger(__name__)
@@ -27,7 +26,7 @@ class ChunkFilter:
         The chunks that are allowed to be processed.
     """
-    def __init__(self, *, parts: Union[str, list], total: int):
+    def __init__(self, *, parts: str | list, total: int):
         """Initializes the ChunkFilter with the given parts and total number of chunks.
         Parameters

anemoi-datasets 0.5.25__py3-none-any.whl → 0.5.27__py3-none-any.whl

anemoi-datasets 0.5.25py3-none-any.whl → 0.5.27py3-none-any.whl