anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -17,9 +17,6 @@ import pickle
17
17
  import shutil
18
18
  import socket
19
19
  from typing import Any
20
- from typing import List
21
- from typing import Optional
22
- from typing import Union
23
20
 
24
21
  import numpy as np
25
22
  import tqdm
@@ -77,7 +74,7 @@ def default_statistics_dates(dates: list[datetime.datetime]) -> tuple[datetime.d
77
74
  return dates[0], end
78
75
 
79
76
 
80
- def to_datetime(date: Union[str, datetime.datetime]) -> np.datetime64:
77
+ def to_datetime(date: str | datetime.datetime) -> np.datetime64:
81
78
  """Convert a date to numpy datetime64 format.
82
79
 
83
80
  Parameters
@@ -97,7 +94,7 @@ def to_datetime(date: Union[str, datetime.datetime]) -> np.datetime64:
97
94
  return date
98
95
 
99
96
 
100
- def to_datetimes(dates: list[Union[str, datetime.datetime]]) -> list[np.datetime64]:
97
+ def to_datetimes(dates: list[str | datetime.datetime]) -> list[np.datetime64]:
101
98
  """Convert a list of dates to numpy datetime64 format.
102
99
 
103
100
  Parameters
@@ -221,7 +218,7 @@ def check_variance(
221
218
 
222
219
 
223
220
  def compute_statistics(
224
- array: NDArray[Any], check_variables_names: Optional[List[str]] = None, allow_nans: bool = False
221
+ array: NDArray[Any], check_variables_names: list[str] | None = None, allow_nans: bool = False
225
222
  ) -> dict[str, np.ndarray]:
226
223
  """Compute statistics for a given array, provides minimum, maximum, sum, squares, count and has_nans as a dictionary.
227
224
 
@@ -0,0 +1,4 @@
1
+ class TestingContext:
2
+ """A context for testing plugins."""
3
+
4
+ pass
@@ -8,8 +8,7 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
  import datetime
11
- from typing import List
12
11
 
13
12
  Date = datetime.datetime
14
13
 
15
- DateList = List[Date]
14
+ DateList = list[Date]
@@ -13,7 +13,6 @@ import os
13
13
  import warnings
14
14
  from contextlib import contextmanager
15
15
  from typing import Any
16
- from typing import Union
17
16
 
18
17
  import numpy as np
19
18
  from earthkit.data import settings
@@ -97,48 +96,6 @@ def to_datetime(*args: Any, **kwargs: Any) -> datetime.datetime:
97
96
  return to_datetime_(*args, **kwargs)
98
97
 
99
98
 
100
- def make_list_int(value: Union[str, list, tuple, int]) -> list[int]:
101
- """Convert a string, list, tuple, or integer to a list of integers.
102
-
103
- Parameters
104
- ----------
105
- value : str or list or tuple or int
106
- The value to convert.
107
-
108
- Returns
109
- -------
110
- list[int]
111
- A list of integers.
112
-
113
- Raises
114
- ------
115
- ValueError
116
- If the value cannot be converted to a list of integers.
117
- """
118
- # Convert a string like "1/2/3" or "1/to/3" or "1/to/10/by/2" to a list of integers.
119
- # Moved to anemoi.utils.humanize
120
- # replace with from anemoi.utils.humanize import make_list_int
121
- # when anemoi-utils is released and pyproject.toml is updated
122
- if isinstance(value, str):
123
- if "/" not in value:
124
- return [value]
125
- bits = value.split("/")
126
- if len(bits) == 3 and bits[1].lower() == "to":
127
- value = list(range(int(bits[0]), int(bits[2]) + 1, 1))
128
-
129
- elif len(bits) == 5 and bits[1].lower() == "to" and bits[3].lower() == "by":
130
- value = list(range(int(bits[0]), int(bits[2]) + int(bits[4]), int(bits[4])))
131
-
132
- if isinstance(value, list):
133
- return value
134
- if isinstance(value, tuple):
135
- return value
136
- if isinstance(value, int):
137
- return [value]
138
-
139
- raise ValueError(f"Cannot make list from {value}")
140
-
141
-
142
99
  def normalize_and_check_dates(
143
100
  dates: list[datetime.datetime],
144
101
  start: datetime.datetime,
@@ -11,7 +11,6 @@ import datetime
11
11
  import logging
12
12
  import shutil
13
13
  from typing import Any
14
- from typing import Optional
15
14
 
16
15
  import numpy as np
17
16
  import zarr
@@ -120,7 +119,7 @@ class ZarrBuiltRegistry:
120
119
  flags = None
121
120
  z = None
122
121
 
123
- def __init__(self, path: str, synchronizer_path: Optional[str] = None, use_threads: bool = False):
122
+ def __init__(self, path: str, synchronizer_path: str | None = None, use_threads: bool = False):
124
123
  """Initialize the ZarrBuiltRegistry.
125
124
 
126
125
  Parameters
@@ -154,6 +153,12 @@ class ZarrBuiltRegistry:
154
153
  except FileNotFoundError:
155
154
  pass
156
155
 
156
+ _build = self.zarr_path + "/_build"
157
+ try:
158
+ shutil.rmtree(_build)
159
+ except FileNotFoundError:
160
+ pass
161
+
157
162
  def _open_write(self) -> zarr.Group:
158
163
  """Open the Zarr store in write mode."""
159
164
  import zarr
@@ -8,9 +8,9 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
  import logging
11
+ import os
11
12
  from typing import TYPE_CHECKING
12
13
  from typing import Any
13
- from typing import Set
14
14
 
15
15
  # from .dataset import FullIndex
16
16
  # from .dataset import Shape
@@ -82,6 +82,9 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
82
82
  Dataset
83
83
  The opened dataset.
84
84
  """
85
+
86
+ trace = int(os.environ.get("ANEMOI_DATASETS_TRACE", 0))
87
+
85
88
  # That will get rid of OmegaConf objects
86
89
 
87
90
  args, kwargs = _convert(args), _convert(kwargs)
@@ -90,22 +93,28 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
90
93
  ds = ds.mutate()
91
94
  ds.arguments = {"args": args, "kwargs": kwargs}
92
95
  ds._check()
96
+
97
+ if trace:
98
+ from anemoi.datasets.testing import Trace
99
+
100
+ ds = Trace(ds)
101
+
93
102
  return ds
94
103
 
95
104
 
96
- def save_dataset(recipe: dict, zarr_path: str, n_workers: int = 1) -> None:
105
+ def save_dataset(dataset: "Dataset", zarr_path: str, n_workers: int = 1) -> None:
97
106
  """Open a dataset and save it to disk.
98
107
 
99
108
  Parameters
100
109
  ----------
101
- recipe : dict
102
- Recipe used with open_dataset (not a dataset creation recipe).
110
+ dataset : Dataset
111
+ anemoi-dataset opened from python to save to Zarr store
103
112
  zarr_path : str
104
113
  Path to store the obtained anemoi dataset to disk.
105
114
  n_workers : int
106
115
  Number of workers to use for parallel processing. If none, sequential processing will be performed.
107
116
  """
108
- _save_dataset(recipe, zarr_path, n_workers)
117
+ _save_dataset(dataset, zarr_path, n_workers)
109
118
 
110
119
 
111
120
  def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
@@ -124,6 +133,6 @@ def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
124
133
  The list of dataset names.
125
134
  """
126
135
  ds = _open_dataset(*args, **kwargs)
127
- names: Set[str] = set()
136
+ names: set[str] = set()
128
137
  ds.get_dataset_names(names)
129
138
  return sorted(names)
@@ -12,11 +12,6 @@ import logging
12
12
  from abc import abstractmethod
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Optional
18
- from typing import Set
19
- from typing import Tuple
20
15
 
21
16
  import numpy as np
22
17
  from numpy.typing import NDArray
@@ -91,26 +86,26 @@ class Complement(Combined):
91
86
  raise ValueError("Augment: no missing variables")
92
87
 
93
88
  @property
94
- def variables(self) -> List[str]:
89
+ def variables(self) -> list[str]:
95
90
  """Returns the list of variables to be added to the target dataset."""
96
91
  return self._variables
97
92
 
98
93
  @property
99
- def statistics(self) -> Dict[str, NDArray[Any]]:
94
+ def statistics(self) -> dict[str, NDArray[Any]]:
100
95
  datasets = [self._source, self._target]
101
96
  return {
102
97
  k: [d.statistics[k][d.name_to_index[i]] for d in datasets for i in d.variables if i in self.variables]
103
98
  for k in datasets[0].statistics
104
99
  }
105
100
 
106
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
101
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
107
102
  index = [self._source.name_to_index[v] for v in self._variables]
108
103
  if delta is None:
109
104
  delta = self.frequency
110
105
  return {k: v[index] for k, v in self._source.statistics_tendencies(delta).items()}
111
106
 
112
107
  @property
113
- def name_to_index(self) -> Dict[str, int]:
108
+ def name_to_index(self) -> dict[str, int]:
114
109
  """Returns a dictionary mapping variable names to their indices."""
115
110
  return {v: i for i, v in enumerate(self.variables)}
116
111
 
@@ -121,7 +116,7 @@ class Complement(Combined):
121
116
  return (shape[0], len(self._variables)) + shape[2:]
122
117
 
123
118
  @property
124
- def variables_metadata(self) -> Dict[str, Any]:
119
+ def variables_metadata(self) -> dict[str, Any]:
125
120
  """Returns the metadata of the variables to be added to the target dataset."""
126
121
  # Merge the two dicts first
127
122
  all_meta = {**self._source.variables_metadata, **self._target.variables_metadata}
@@ -142,7 +137,7 @@ class Complement(Combined):
142
137
  pass
143
138
 
144
139
  @cached_property
145
- def missing(self) -> Set[int]:
140
+ def missing(self) -> set[int]:
146
141
  """Returns the set of missing indices in the source and target datasets."""
147
142
  missing = self._source.missing.copy()
148
143
  missing = missing | self._target.missing
@@ -317,7 +312,7 @@ class ComplementNearest(Complement):
317
312
  return apply_index_to_slices_changes(result, changes)
318
313
 
319
314
 
320
- def complement_factory(args: Tuple, kwargs: dict) -> Dataset:
315
+ def complement_factory(args: tuple, kwargs: dict) -> Dataset:
321
316
  """Factory function to create a Complement instance based on the provided arguments.
322
317
 
323
318
  Parameters
@@ -11,9 +11,6 @@
11
11
  import logging
12
12
  from functools import cached_property
13
13
  from typing import Any
14
- from typing import List
15
- from typing import Set
16
- from typing import Tuple
17
14
 
18
15
  import numpy as np
19
16
  from anemoi.utils.dates import frequency_to_timedelta
@@ -123,12 +120,12 @@ class ConcatMixin:
123
120
  return np.concatenate(result)
124
121
 
125
122
  @cached_property
126
- def missing(self) -> Set[int]:
123
+ def missing(self) -> set[int]:
127
124
  """Returns the set of missing indices in the concatenated datasets."""
128
- result: Set[int] = set()
125
+ result: set[int] = set()
129
126
  offset = 0
130
127
  for d in self.datasets:
131
- result = result | set(m + offset for m in d.missing)
128
+ result = result | {m + offset for m in d.missing}
132
129
  offset += len(d)
133
130
  return result
134
131
 
@@ -195,7 +192,7 @@ class Concat(ConcatMixin, Combined):
195
192
  return Node(self, [d.tree() for d in self.datasets])
196
193
 
197
194
  @classmethod
198
- def check_dataset_compatibility(cls, datasets: List[Any], fill_missing_gaps: bool = False) -> List[Any]:
195
+ def check_dataset_compatibility(cls, datasets: list[Any], fill_missing_gaps: bool = False) -> list[Any]:
199
196
  """Checks the compatibility of the datasets for concatenation and fills missing gaps if required.
200
197
 
201
198
  Parameters
@@ -259,7 +256,7 @@ class Concat(ConcatMixin, Combined):
259
256
  return {}
260
257
 
261
258
 
262
- def concat_factory(args: Tuple[Any, ...], kwargs: dict) -> Concat:
259
+ def concat_factory(args: tuple[Any, ...], kwargs: dict) -> Concat:
263
260
  """Factory function to create a Concat object.
264
261
 
265
262
  Parameters
@@ -22,15 +22,10 @@ try:
22
22
  except ImportError:
23
23
  # Python 3.9
24
24
  EllipsisType = type(Ellipsis)
25
+ from collections.abc import Sequence
26
+ from collections.abc import Sized
25
27
  from typing import TYPE_CHECKING
26
28
  from typing import Any
27
- from typing import Dict
28
- from typing import List
29
- from typing import Optional
30
- from typing import Sequence
31
- from typing import Set
32
- from typing import Sized
33
- from typing import Tuple
34
29
  from typing import Union
35
30
 
36
31
  import numpy as np
@@ -48,8 +43,8 @@ if TYPE_CHECKING:
48
43
  LOG = logging.getLogger(__name__)
49
44
 
50
45
 
51
- Shape = Tuple[int, ...]
52
- TupleIndex = Tuple[Union[int, slice, EllipsisType], ...]
46
+ Shape = tuple[int, ...]
47
+ TupleIndex = tuple[Union[int, slice, EllipsisType], ...]
53
48
  FullIndex = Union[int, slice, TupleIndex]
54
49
 
55
50
 
@@ -92,8 +87,8 @@ def _tidy(v: Any) -> Any:
92
87
 
93
88
 
94
89
  class Dataset(ABC, Sized):
95
- arguments: Dict[str, Any] = {}
96
- _name: Union[str, None] = None
90
+ arguments: dict[str, Any] = {}
91
+ _name: str | None = None
97
92
 
98
93
  def mutate(self) -> "Dataset":
99
94
  """Give an opportunity to a subclass to return a new Dataset object of a different class, if needed.
@@ -148,7 +143,7 @@ class Dataset(ABC, Sized):
148
143
  return result
149
144
 
150
145
  @property
151
- def name(self) -> Union[str, None]:
146
+ def name(self) -> str | None:
152
147
  """Return the name of the dataset."""
153
148
  return self._name
154
149
 
@@ -298,6 +293,12 @@ class Dataset(ABC, Sized):
298
293
  if skip_missing_dates:
299
294
  return SkipMissingDates(self, expected_access)._subset(**kwargs).mutate()
300
295
 
296
+ if "rolling_average" in kwargs:
297
+ from .rolling_average import RollingAverage
298
+
299
+ rolling_average = kwargs.pop("rolling_average")
300
+ return RollingAverage(self, rolling_average)._subset(**kwargs).mutate()
301
+
301
302
  if "interpolate_frequency" in kwargs:
302
303
  from .interpolate import InterpolateFrequency
303
304
 
@@ -360,9 +361,9 @@ class Dataset(ABC, Sized):
360
361
 
361
362
  def _dates_to_indices(
362
363
  self,
363
- start: Union[None, str, datetime.datetime],
364
- end: Union[None, str, datetime.datetime],
365
- ) -> List[int]:
364
+ start: None | str | datetime.datetime,
365
+ end: None | str | datetime.datetime,
366
+ ) -> list[int]:
366
367
  """Convert date range to a list of indices.
367
368
 
368
369
  Parameters
@@ -387,7 +388,7 @@ class Dataset(ABC, Sized):
387
388
 
388
389
  return [i for i, date in enumerate(self.dates) if start <= date <= end]
389
390
 
390
- def _select_to_columns(self, vars: Union[str, List[str], Tuple[str], set]) -> List[int]:
391
+ def _select_to_columns(self, vars: str | list[str] | tuple[str] | set) -> list[int]:
391
392
  """Convert variable names to a list of column indices.
392
393
 
393
394
  Parameters
@@ -411,7 +412,7 @@ class Dataset(ABC, Sized):
411
412
 
412
413
  return [self.name_to_index[v] for v in vars]
413
414
 
414
- def _drop_to_columns(self, vars: Union[str, Sequence[str]]) -> List[int]:
415
+ def _drop_to_columns(self, vars: str | Sequence[str]) -> list[int]:
415
416
  """Convert variable names to a list of column indices to drop.
416
417
 
417
418
  Parameters
@@ -432,7 +433,7 @@ class Dataset(ABC, Sized):
432
433
 
433
434
  return sorted([v for k, v in self.name_to_index.items() if k not in vars])
434
435
 
435
- def _reorder_to_columns(self, vars: Union[str, List[str], Tuple[str], Dict[str, int]]) -> List[int]:
436
+ def _reorder_to_columns(self, vars: str | list[str] | tuple[str] | dict[str, int]) -> list[int]:
436
437
  """Convert variable names to a list of reordered column indices.
437
438
 
438
439
  Parameters
@@ -465,8 +466,8 @@ class Dataset(ABC, Sized):
465
466
  return indices
466
467
 
467
468
  def dates_interval_to_indices(
468
- self, start: Union[None, str, datetime.datetime], end: Union[None, str, datetime.datetime]
469
- ) -> List[int]:
469
+ self, start: None | str | datetime.datetime, end: None | str | datetime.datetime
470
+ ) -> list[int]:
470
471
  """Convert date interval to a list of indices.
471
472
 
472
473
  Parameters
@@ -483,7 +484,7 @@ class Dataset(ABC, Sized):
483
484
  """
484
485
  return self._dates_to_indices(start, end)
485
486
 
486
- def provenance(self) -> Dict[str, Any]:
487
+ def provenance(self) -> dict[str, Any]:
487
488
  """Return the provenance information of the dataset.
488
489
 
489
490
  Returns
@@ -511,7 +512,7 @@ class Dataset(ABC, Sized):
511
512
  return tuple(shape)
512
513
 
513
514
  @property
514
- def typed_variables(self) -> Dict[str, Any]:
515
+ def typed_variables(self) -> dict[str, Any]:
515
516
  """Return the variables with their types."""
516
517
  from anemoi.transform.variables import Variable
517
518
 
@@ -532,7 +533,7 @@ class Dataset(ABC, Sized):
532
533
 
533
534
  return result
534
535
 
535
- def _input_sources(self) -> List[Any]:
536
+ def _input_sources(self) -> list[Any]:
536
537
  """Return the input sources of the dataset.
537
538
 
538
539
  Returns
@@ -544,7 +545,7 @@ class Dataset(ABC, Sized):
544
545
  self.collect_input_sources(sources)
545
546
  return sources
546
547
 
547
- def metadata(self) -> Dict[str, Any]:
548
+ def metadata(self) -> dict[str, Any]:
548
549
  """Return the metadata of the dataset.
549
550
 
550
551
  Returns
@@ -588,7 +589,7 @@ class Dataset(ABC, Sized):
588
589
  """Return the end date of the dataset."""
589
590
  return self.dates[-1]
590
591
 
591
- def dataset_metadata(self) -> Dict[str, Any]:
592
+ def dataset_metadata(self) -> dict[str, Any]:
592
593
  """Return the metadata of the dataset.
593
594
 
594
595
  Returns
@@ -608,7 +609,7 @@ class Dataset(ABC, Sized):
608
609
  name=self.name,
609
610
  )
610
611
 
611
- def _supporting_arrays(self, *path: str) -> Dict[str, NDArray[Any]]:
612
+ def _supporting_arrays(self, *path: str) -> dict[str, NDArray[Any]]:
612
613
  """Return the supporting arrays of the dataset.
613
614
 
614
615
  Parameters
@@ -646,7 +647,7 @@ class Dataset(ABC, Sized):
646
647
 
647
648
  return result
648
649
 
649
- def supporting_arrays(self) -> Dict[str, NDArray[Any]]:
650
+ def supporting_arrays(self) -> dict[str, NDArray[Any]]:
650
651
  """Return the supporting arrays to be saved in the checkpoints.
651
652
 
652
653
  Returns
@@ -657,7 +658,7 @@ class Dataset(ABC, Sized):
657
658
  arrays, _ = self._supporting_arrays_and_sources()
658
659
  return arrays
659
660
 
660
- def _supporting_arrays_and_sources(self) -> Tuple[Dict[str, NDArray], Dict[int, List[str]]]:
661
+ def _supporting_arrays_and_sources(self) -> tuple[dict[str, NDArray], dict[int, list[str]]]:
661
662
  """Return the supporting arrays and their sources.
662
663
 
663
664
  Returns
@@ -684,7 +685,7 @@ class Dataset(ABC, Sized):
684
685
 
685
686
  return result, source_to_arrays
686
687
 
687
- def collect_supporting_arrays(self, collected: List[Tuple[Tuple[str, ...], str, NDArray[Any]]], *path: str) -> None:
688
+ def collect_supporting_arrays(self, collected: list[tuple[tuple[str, ...], str, NDArray[Any]]], *path: str) -> None:
688
689
  """Collect supporting arrays.
689
690
 
690
691
  Parameters
@@ -697,7 +698,7 @@ class Dataset(ABC, Sized):
697
698
  # Override this method to add more arrays
698
699
  pass
699
700
 
700
- def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
701
+ def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
701
702
  """Return specific metadata of the dataset.
702
703
 
703
704
  Parameters
@@ -764,7 +765,7 @@ class Dataset(ABC, Sized):
764
765
  """Return the label of the dataset."""
765
766
  return self.__class__.__name__.lower()
766
767
 
767
- def computed_constant_fields(self) -> List[str]:
768
+ def computed_constant_fields(self) -> list[str]:
768
769
  """Return the computed constant fields of the dataset.
769
770
 
770
771
  Returns
@@ -781,7 +782,7 @@ class Dataset(ABC, Sized):
781
782
 
782
783
  return sorted(self._compute_constant_fields_from_a_few_samples())
783
784
 
784
- def _compute_constant_fields_from_a_few_samples(self) -> List[str]:
785
+ def _compute_constant_fields_from_a_few_samples(self) -> list[str]:
785
786
  """Compute constant fields from a few samples.
786
787
 
787
788
  Returns
@@ -822,7 +823,7 @@ class Dataset(ABC, Sized):
822
823
 
823
824
  return [v for i, v in enumerate(self.variables) if constants[i]]
824
825
 
825
- def _compute_constant_fields_from_statistics(self) -> List[str]:
826
+ def _compute_constant_fields_from_statistics(self) -> list[str]:
826
827
  """Compute constant fields from statistics.
827
828
 
828
829
  Returns
@@ -842,8 +843,8 @@ class Dataset(ABC, Sized):
842
843
 
843
844
  def plot(
844
845
  self,
845
- date: Union[int, datetime.datetime, np.datetime64, str],
846
- variable: Union[int, str],
846
+ date: int | datetime.datetime | np.datetime64 | str,
847
+ variable: int | str,
847
848
  member: int = 0,
848
849
  **kwargs: Any,
849
850
  ) -> "matplotlib.pyplot.Axes":
@@ -873,10 +874,10 @@ class Dataset(ABC, Sized):
873
874
 
874
875
  def to_index(
875
876
  self,
876
- date: Union[int, datetime.datetime, np.datetime64, str],
877
- variable: Union[int, str],
877
+ date: int | datetime.datetime | np.datetime64 | str,
878
+ variable: int | str,
878
879
  member: int = 0,
879
- ) -> Tuple[int, int, int]:
880
+ ) -> tuple[int, int, int]:
880
881
  """Convert date, variable, and member to indices.
881
882
 
882
883
  Parameters
@@ -945,7 +946,7 @@ class Dataset(ABC, Sized):
945
946
 
946
947
  @property
947
948
  @abstractmethod
948
- def variables(self) -> List[str]:
949
+ def variables(self) -> list[str]:
949
950
  """Return the list of variables in the dataset."""
950
951
  pass
951
952
 
@@ -969,7 +970,7 @@ class Dataset(ABC, Sized):
969
970
 
970
971
  @property
971
972
  @abstractmethod
972
- def name_to_index(self) -> Dict[str, int]:
973
+ def name_to_index(self) -> dict[str, int]:
973
974
  """Return the mapping of variable names to indices."""
974
975
  pass
975
976
 
@@ -1005,30 +1006,30 @@ class Dataset(ABC, Sized):
1005
1006
 
1006
1007
  @property
1007
1008
  @abstractmethod
1008
- def variables_metadata(self) -> Dict[str, Any]:
1009
+ def variables_metadata(self) -> dict[str, Any]:
1009
1010
  """Return the metadata of the variables in the dataset."""
1010
1011
  pass
1011
1012
 
1012
1013
  @abstractmethod
1013
1014
  @cached_property
1014
- def missing(self) -> Set[int]:
1015
+ def missing(self) -> set[int]:
1015
1016
  """Return the set of missing indices in the dataset."""
1016
1017
  pass
1017
1018
 
1018
1019
  @abstractmethod
1019
1020
  @cached_property
1020
- def constant_fields(self) -> List[str]:
1021
+ def constant_fields(self) -> list[str]:
1021
1022
  """Return the list of constant fields in the dataset."""
1022
1023
  pass
1023
1024
 
1024
1025
  @abstractmethod
1025
1026
  @cached_property
1026
- def statistics(self) -> Dict[str, NDArray[Any]]:
1027
+ def statistics(self) -> dict[str, NDArray[Any]]:
1027
1028
  """Return the statistics of the dataset."""
1028
1029
  pass
1029
1030
 
1030
1031
  @abstractmethod
1031
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
1032
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
1032
1033
  """Return the tendencies of the statistics in the dataset.
1033
1034
 
1034
1035
  Parameters
@@ -1071,7 +1072,7 @@ class Dataset(ABC, Sized):
1071
1072
  pass
1072
1073
 
1073
1074
  @abstractmethod
1074
- def collect_input_sources(self, sources: List[Any]) -> None:
1075
+ def collect_input_sources(self, sources: list[Any]) -> None:
1075
1076
  """Collect the input sources of the dataset.
1076
1077
 
1077
1078
  Parameters
@@ -1082,7 +1083,7 @@ class Dataset(ABC, Sized):
1082
1083
  pass
1083
1084
 
1084
1085
  @abstractmethod
1085
- def get_dataset_names(self, names: Set[str]) -> None:
1086
+ def get_dataset_names(self, names: set[str]) -> None:
1086
1087
  """Get the names of the datasets.
1087
1088
 
1088
1089
  Parameters