anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/validate.py +59 -0
  8. anemoi/datasets/compute/recentre.py +3 -6
  9. anemoi/datasets/create/__init__.py +22 -25
  10. anemoi/datasets/create/check.py +10 -12
  11. anemoi/datasets/create/chunks.py +1 -2
  12. anemoi/datasets/create/config.py +3 -6
  13. anemoi/datasets/create/filter.py +1 -2
  14. anemoi/datasets/create/input/__init__.py +1 -2
  15. anemoi/datasets/create/input/action.py +3 -5
  16. anemoi/datasets/create/input/concat.py +5 -8
  17. anemoi/datasets/create/input/context.py +3 -6
  18. anemoi/datasets/create/input/data_sources.py +5 -8
  19. anemoi/datasets/create/input/empty.py +1 -2
  20. anemoi/datasets/create/input/filter.py +2 -3
  21. anemoi/datasets/create/input/function.py +1 -2
  22. anemoi/datasets/create/input/join.py +4 -5
  23. anemoi/datasets/create/input/misc.py +4 -6
  24. anemoi/datasets/create/input/repeated_dates.py +13 -18
  25. anemoi/datasets/create/input/result.py +29 -33
  26. anemoi/datasets/create/input/step.py +4 -8
  27. anemoi/datasets/create/input/template.py +3 -4
  28. anemoi/datasets/create/input/trace.py +1 -1
  29. anemoi/datasets/create/patch.py +1 -2
  30. anemoi/datasets/create/persistent.py +3 -5
  31. anemoi/datasets/create/size.py +1 -3
  32. anemoi/datasets/create/sources/accumulations.py +47 -52
  33. anemoi/datasets/create/sources/accumulations2.py +4 -8
  34. anemoi/datasets/create/sources/constants.py +1 -3
  35. anemoi/datasets/create/sources/empty.py +1 -2
  36. anemoi/datasets/create/sources/fdb.py +133 -0
  37. anemoi/datasets/create/sources/forcings.py +1 -2
  38. anemoi/datasets/create/sources/grib.py +6 -10
  39. anemoi/datasets/create/sources/grib_index.py +13 -15
  40. anemoi/datasets/create/sources/hindcasts.py +2 -5
  41. anemoi/datasets/create/sources/legacy.py +1 -1
  42. anemoi/datasets/create/sources/mars.py +17 -21
  43. anemoi/datasets/create/sources/netcdf.py +1 -2
  44. anemoi/datasets/create/sources/opendap.py +1 -3
  45. anemoi/datasets/create/sources/patterns.py +4 -6
  46. anemoi/datasets/create/sources/recentre.py +8 -11
  47. anemoi/datasets/create/sources/source.py +3 -6
  48. anemoi/datasets/create/sources/tendencies.py +2 -5
  49. anemoi/datasets/create/sources/xarray.py +4 -6
  50. anemoi/datasets/create/sources/xarray_support/__init__.py +12 -13
  51. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  52. anemoi/datasets/create/sources/xarray_support/field.py +16 -12
  53. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  54. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  55. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  56. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  57. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  58. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  59. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  60. anemoi/datasets/create/sources/xarray_zarr.py +1 -2
  61. anemoi/datasets/create/sources/zenodo.py +3 -5
  62. anemoi/datasets/create/statistics/__init__.py +3 -6
  63. anemoi/datasets/create/testing.py +4 -0
  64. anemoi/datasets/create/typing.py +1 -2
  65. anemoi/datasets/create/utils.py +1 -2
  66. anemoi/datasets/create/zarr.py +7 -2
  67. anemoi/datasets/data/__init__.py +15 -6
  68. anemoi/datasets/data/complement.py +7 -12
  69. anemoi/datasets/data/concat.py +5 -8
  70. anemoi/datasets/data/dataset.py +42 -47
  71. anemoi/datasets/data/debug.py +7 -9
  72. anemoi/datasets/data/ensemble.py +4 -6
  73. anemoi/datasets/data/fill_missing.py +7 -10
  74. anemoi/datasets/data/forwards.py +22 -26
  75. anemoi/datasets/data/grids.py +12 -16
  76. anemoi/datasets/data/indexing.py +9 -12
  77. anemoi/datasets/data/interpolate.py +7 -15
  78. anemoi/datasets/data/join.py +8 -12
  79. anemoi/datasets/data/masked.py +6 -11
  80. anemoi/datasets/data/merge.py +5 -9
  81. anemoi/datasets/data/misc.py +41 -45
  82. anemoi/datasets/data/missing.py +11 -16
  83. anemoi/datasets/data/observations/__init__.py +8 -14
  84. anemoi/datasets/data/padded.py +3 -5
  85. anemoi/datasets/data/records/backends/__init__.py +2 -2
  86. anemoi/datasets/data/rescale.py +5 -12
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +16 -21
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +13 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/grids.py +5 -9
  96. anemoi/datasets/testing.py +93 -7
  97. anemoi/datasets/validate.py +598 -0
  98. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +4 -4
  99. anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
  100. anemoi/datasets/utils/__init__.py +0 -8
  101. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  102. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
  103. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
  104. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
  105. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,6 @@ import datetime
11
11
  import logging
12
12
  import shutil
13
13
  from typing import Any
14
- from typing import Optional
15
14
 
16
15
  import numpy as np
17
16
  import zarr
@@ -120,7 +119,7 @@ class ZarrBuiltRegistry:
120
119
  flags = None
121
120
  z = None
122
121
 
123
- def __init__(self, path: str, synchronizer_path: Optional[str] = None, use_threads: bool = False):
122
+ def __init__(self, path: str, synchronizer_path: str | None = None, use_threads: bool = False):
124
123
  """Initialize the ZarrBuiltRegistry.
125
124
 
126
125
  Parameters
@@ -154,6 +153,12 @@ class ZarrBuiltRegistry:
154
153
  except FileNotFoundError:
155
154
  pass
156
155
 
156
+ _build = self.zarr_path + "/_build"
157
+ try:
158
+ shutil.rmtree(_build)
159
+ except FileNotFoundError:
160
+ pass
161
+
157
162
  def _open_write(self) -> zarr.Group:
158
163
  """Open the Zarr store in write mode."""
159
164
  import zarr
@@ -8,9 +8,9 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
  import logging
11
+ import os
11
12
  from typing import TYPE_CHECKING
12
13
  from typing import Any
13
- from typing import Set
14
14
 
15
15
  # from .dataset import FullIndex
16
16
  # from .dataset import Shape
@@ -82,6 +82,9 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
82
82
  Dataset
83
83
  The opened dataset.
84
84
  """
85
+
86
+ trace = int(os.environ.get("ANEMOI_DATASETS_TRACE", 0))
87
+
85
88
  # That will get rid of OmegaConf objects
86
89
 
87
90
  args, kwargs = _convert(args), _convert(kwargs)
@@ -90,22 +93,28 @@ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
90
93
  ds = ds.mutate()
91
94
  ds.arguments = {"args": args, "kwargs": kwargs}
92
95
  ds._check()
96
+
97
+ if trace:
98
+ from anemoi.datasets.testing import Trace
99
+
100
+ ds = Trace(ds)
101
+
93
102
  return ds
94
103
 
95
104
 
96
- def save_dataset(recipe: dict, zarr_path: str, n_workers: int = 1) -> None:
105
+ def save_dataset(dataset: "Dataset", zarr_path: str, n_workers: int = 1) -> None:
97
106
  """Open a dataset and save it to disk.
98
107
 
99
108
  Parameters
100
109
  ----------
101
- recipe : dict
102
- Recipe used with open_dataset (not a dataset creation recipe).
110
+ dataset : Dataset
111
+ anemoi-dataset opened from python to save to Zarr store
103
112
  zarr_path : str
104
113
  Path to store the obtained anemoi dataset to disk.
105
114
  n_workers : int
106
115
  Number of workers to use for parallel processing. If none, sequential processing will be performed.
107
116
  """
108
- _save_dataset(recipe, zarr_path, n_workers)
117
+ _save_dataset(dataset, zarr_path, n_workers)
109
118
 
110
119
 
111
120
  def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
@@ -124,6 +133,6 @@ def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
124
133
  The list of dataset names.
125
134
  """
126
135
  ds = _open_dataset(*args, **kwargs)
127
- names: Set[str] = set()
136
+ names: set[str] = set()
128
137
  ds.get_dataset_names(names)
129
138
  return sorted(names)
@@ -12,11 +12,6 @@ import logging
12
12
  from abc import abstractmethod
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Optional
18
- from typing import Set
19
- from typing import Tuple
20
15
 
21
16
  import numpy as np
22
17
  from numpy.typing import NDArray
@@ -91,26 +86,26 @@ class Complement(Combined):
91
86
  raise ValueError("Augment: no missing variables")
92
87
 
93
88
  @property
94
- def variables(self) -> List[str]:
89
+ def variables(self) -> list[str]:
95
90
  """Returns the list of variables to be added to the target dataset."""
96
91
  return self._variables
97
92
 
98
93
  @property
99
- def statistics(self) -> Dict[str, NDArray[Any]]:
94
+ def statistics(self) -> dict[str, NDArray[Any]]:
100
95
  datasets = [self._source, self._target]
101
96
  return {
102
97
  k: [d.statistics[k][d.name_to_index[i]] for d in datasets for i in d.variables if i in self.variables]
103
98
  for k in datasets[0].statistics
104
99
  }
105
100
 
106
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
101
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
107
102
  index = [self._source.name_to_index[v] for v in self._variables]
108
103
  if delta is None:
109
104
  delta = self.frequency
110
105
  return {k: v[index] for k, v in self._source.statistics_tendencies(delta).items()}
111
106
 
112
107
  @property
113
- def name_to_index(self) -> Dict[str, int]:
108
+ def name_to_index(self) -> dict[str, int]:
114
109
  """Returns a dictionary mapping variable names to their indices."""
115
110
  return {v: i for i, v in enumerate(self.variables)}
116
111
 
@@ -121,7 +116,7 @@ class Complement(Combined):
121
116
  return (shape[0], len(self._variables)) + shape[2:]
122
117
 
123
118
  @property
124
- def variables_metadata(self) -> Dict[str, Any]:
119
+ def variables_metadata(self) -> dict[str, Any]:
125
120
  """Returns the metadata of the variables to be added to the target dataset."""
126
121
  # Merge the two dicts first
127
122
  all_meta = {**self._source.variables_metadata, **self._target.variables_metadata}
@@ -142,7 +137,7 @@ class Complement(Combined):
142
137
  pass
143
138
 
144
139
  @cached_property
145
- def missing(self) -> Set[int]:
140
+ def missing(self) -> set[int]:
146
141
  """Returns the set of missing indices in the source and target datasets."""
147
142
  missing = self._source.missing.copy()
148
143
  missing = missing | self._target.missing
@@ -317,7 +312,7 @@ class ComplementNearest(Complement):
317
312
  return apply_index_to_slices_changes(result, changes)
318
313
 
319
314
 
320
- def complement_factory(args: Tuple, kwargs: dict) -> Dataset:
315
+ def complement_factory(args: tuple, kwargs: dict) -> Dataset:
321
316
  """Factory function to create a Complement instance based on the provided arguments.
322
317
 
323
318
  Parameters
@@ -11,9 +11,6 @@
11
11
  import logging
12
12
  from functools import cached_property
13
13
  from typing import Any
14
- from typing import List
15
- from typing import Set
16
- from typing import Tuple
17
14
 
18
15
  import numpy as np
19
16
  from anemoi.utils.dates import frequency_to_timedelta
@@ -123,12 +120,12 @@ class ConcatMixin:
123
120
  return np.concatenate(result)
124
121
 
125
122
  @cached_property
126
- def missing(self) -> Set[int]:
123
+ def missing(self) -> set[int]:
127
124
  """Returns the set of missing indices in the concatenated datasets."""
128
- result: Set[int] = set()
125
+ result: set[int] = set()
129
126
  offset = 0
130
127
  for d in self.datasets:
131
- result = result | set(m + offset for m in d.missing)
128
+ result = result | {m + offset for m in d.missing}
132
129
  offset += len(d)
133
130
  return result
134
131
 
@@ -195,7 +192,7 @@ class Concat(ConcatMixin, Combined):
195
192
  return Node(self, [d.tree() for d in self.datasets])
196
193
 
197
194
  @classmethod
198
- def check_dataset_compatibility(cls, datasets: List[Any], fill_missing_gaps: bool = False) -> List[Any]:
195
+ def check_dataset_compatibility(cls, datasets: list[Any], fill_missing_gaps: bool = False) -> list[Any]:
199
196
  """Checks the compatibility of the datasets for concatenation and fills missing gaps if required.
200
197
 
201
198
  Parameters
@@ -259,7 +256,7 @@ class Concat(ConcatMixin, Combined):
259
256
  return {}
260
257
 
261
258
 
262
- def concat_factory(args: Tuple[Any, ...], kwargs: dict) -> Concat:
259
+ def concat_factory(args: tuple[Any, ...], kwargs: dict) -> Concat:
263
260
  """Factory function to create a Concat object.
264
261
 
265
262
  Parameters
@@ -22,15 +22,10 @@ try:
22
22
  except ImportError:
23
23
  # Python 3.9
24
24
  EllipsisType = type(Ellipsis)
25
+ from collections.abc import Sequence
26
+ from collections.abc import Sized
25
27
  from typing import TYPE_CHECKING
26
28
  from typing import Any
27
- from typing import Dict
28
- from typing import List
29
- from typing import Optional
30
- from typing import Sequence
31
- from typing import Set
32
- from typing import Sized
33
- from typing import Tuple
34
29
  from typing import Union
35
30
 
36
31
  import numpy as np
@@ -48,8 +43,8 @@ if TYPE_CHECKING:
48
43
  LOG = logging.getLogger(__name__)
49
44
 
50
45
 
51
- Shape = Tuple[int, ...]
52
- TupleIndex = Tuple[Union[int, slice, EllipsisType], ...]
46
+ Shape = tuple[int, ...]
47
+ TupleIndex = tuple[Union[int, slice, EllipsisType], ...]
53
48
  FullIndex = Union[int, slice, TupleIndex]
54
49
 
55
50
 
@@ -92,8 +87,8 @@ def _tidy(v: Any) -> Any:
92
87
 
93
88
 
94
89
  class Dataset(ABC, Sized):
95
- arguments: Dict[str, Any] = {}
96
- _name: Union[str, None] = None
90
+ arguments: dict[str, Any] = {}
91
+ _name: str | None = None
97
92
 
98
93
  def mutate(self) -> "Dataset":
99
94
  """Give an opportunity to a subclass to return a new Dataset object of a different class, if needed.
@@ -148,7 +143,7 @@ class Dataset(ABC, Sized):
148
143
  return result
149
144
 
150
145
  @property
151
- def name(self) -> Union[str, None]:
146
+ def name(self) -> str | None:
152
147
  """Return the name of the dataset."""
153
148
  return self._name
154
149
 
@@ -360,9 +355,9 @@ class Dataset(ABC, Sized):
360
355
 
361
356
  def _dates_to_indices(
362
357
  self,
363
- start: Union[None, str, datetime.datetime],
364
- end: Union[None, str, datetime.datetime],
365
- ) -> List[int]:
358
+ start: None | str | datetime.datetime,
359
+ end: None | str | datetime.datetime,
360
+ ) -> list[int]:
366
361
  """Convert date range to a list of indices.
367
362
 
368
363
  Parameters
@@ -387,7 +382,7 @@ class Dataset(ABC, Sized):
387
382
 
388
383
  return [i for i, date in enumerate(self.dates) if start <= date <= end]
389
384
 
390
- def _select_to_columns(self, vars: Union[str, List[str], Tuple[str], set]) -> List[int]:
385
+ def _select_to_columns(self, vars: str | list[str] | tuple[str] | set) -> list[int]:
391
386
  """Convert variable names to a list of column indices.
392
387
 
393
388
  Parameters
@@ -411,7 +406,7 @@ class Dataset(ABC, Sized):
411
406
 
412
407
  return [self.name_to_index[v] for v in vars]
413
408
 
414
- def _drop_to_columns(self, vars: Union[str, Sequence[str]]) -> List[int]:
409
+ def _drop_to_columns(self, vars: str | Sequence[str]) -> list[int]:
415
410
  """Convert variable names to a list of column indices to drop.
416
411
 
417
412
  Parameters
@@ -432,7 +427,7 @@ class Dataset(ABC, Sized):
432
427
 
433
428
  return sorted([v for k, v in self.name_to_index.items() if k not in vars])
434
429
 
435
- def _reorder_to_columns(self, vars: Union[str, List[str], Tuple[str], Dict[str, int]]) -> List[int]:
430
+ def _reorder_to_columns(self, vars: str | list[str] | tuple[str] | dict[str, int]) -> list[int]:
436
431
  """Convert variable names to a list of reordered column indices.
437
432
 
438
433
  Parameters
@@ -465,8 +460,8 @@ class Dataset(ABC, Sized):
465
460
  return indices
466
461
 
467
462
  def dates_interval_to_indices(
468
- self, start: Union[None, str, datetime.datetime], end: Union[None, str, datetime.datetime]
469
- ) -> List[int]:
463
+ self, start: None | str | datetime.datetime, end: None | str | datetime.datetime
464
+ ) -> list[int]:
470
465
  """Convert date interval to a list of indices.
471
466
 
472
467
  Parameters
@@ -483,7 +478,7 @@ class Dataset(ABC, Sized):
483
478
  """
484
479
  return self._dates_to_indices(start, end)
485
480
 
486
- def provenance(self) -> Dict[str, Any]:
481
+ def provenance(self) -> dict[str, Any]:
487
482
  """Return the provenance information of the dataset.
488
483
 
489
484
  Returns
@@ -511,7 +506,7 @@ class Dataset(ABC, Sized):
511
506
  return tuple(shape)
512
507
 
513
508
  @property
514
- def typed_variables(self) -> Dict[str, Any]:
509
+ def typed_variables(self) -> dict[str, Any]:
515
510
  """Return the variables with their types."""
516
511
  from anemoi.transform.variables import Variable
517
512
 
@@ -532,7 +527,7 @@ class Dataset(ABC, Sized):
532
527
 
533
528
  return result
534
529
 
535
- def _input_sources(self) -> List[Any]:
530
+ def _input_sources(self) -> list[Any]:
536
531
  """Return the input sources of the dataset.
537
532
 
538
533
  Returns
@@ -544,7 +539,7 @@ class Dataset(ABC, Sized):
544
539
  self.collect_input_sources(sources)
545
540
  return sources
546
541
 
547
- def metadata(self) -> Dict[str, Any]:
542
+ def metadata(self) -> dict[str, Any]:
548
543
  """Return the metadata of the dataset.
549
544
 
550
545
  Returns
@@ -588,7 +583,7 @@ class Dataset(ABC, Sized):
588
583
  """Return the end date of the dataset."""
589
584
  return self.dates[-1]
590
585
 
591
- def dataset_metadata(self) -> Dict[str, Any]:
586
+ def dataset_metadata(self) -> dict[str, Any]:
592
587
  """Return the metadata of the dataset.
593
588
 
594
589
  Returns
@@ -608,7 +603,7 @@ class Dataset(ABC, Sized):
608
603
  name=self.name,
609
604
  )
610
605
 
611
- def _supporting_arrays(self, *path: str) -> Dict[str, NDArray[Any]]:
606
+ def _supporting_arrays(self, *path: str) -> dict[str, NDArray[Any]]:
612
607
  """Return the supporting arrays of the dataset.
613
608
 
614
609
  Parameters
@@ -646,7 +641,7 @@ class Dataset(ABC, Sized):
646
641
 
647
642
  return result
648
643
 
649
- def supporting_arrays(self) -> Dict[str, NDArray[Any]]:
644
+ def supporting_arrays(self) -> dict[str, NDArray[Any]]:
650
645
  """Return the supporting arrays to be saved in the checkpoints.
651
646
 
652
647
  Returns
@@ -657,7 +652,7 @@ class Dataset(ABC, Sized):
657
652
  arrays, _ = self._supporting_arrays_and_sources()
658
653
  return arrays
659
654
 
660
- def _supporting_arrays_and_sources(self) -> Tuple[Dict[str, NDArray], Dict[int, List[str]]]:
655
+ def _supporting_arrays_and_sources(self) -> tuple[dict[str, NDArray], dict[int, list[str]]]:
661
656
  """Return the supporting arrays and their sources.
662
657
 
663
658
  Returns
@@ -684,7 +679,7 @@ class Dataset(ABC, Sized):
684
679
 
685
680
  return result, source_to_arrays
686
681
 
687
- def collect_supporting_arrays(self, collected: List[Tuple[Tuple[str, ...], str, NDArray[Any]]], *path: str) -> None:
682
+ def collect_supporting_arrays(self, collected: list[tuple[tuple[str, ...], str, NDArray[Any]]], *path: str) -> None:
688
683
  """Collect supporting arrays.
689
684
 
690
685
  Parameters
@@ -697,7 +692,7 @@ class Dataset(ABC, Sized):
697
692
  # Override this method to add more arrays
698
693
  pass
699
694
 
700
- def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
695
+ def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
701
696
  """Return specific metadata of the dataset.
702
697
 
703
698
  Parameters
@@ -764,7 +759,7 @@ class Dataset(ABC, Sized):
764
759
  """Return the label of the dataset."""
765
760
  return self.__class__.__name__.lower()
766
761
 
767
- def computed_constant_fields(self) -> List[str]:
762
+ def computed_constant_fields(self) -> list[str]:
768
763
  """Return the computed constant fields of the dataset.
769
764
 
770
765
  Returns
@@ -781,7 +776,7 @@ class Dataset(ABC, Sized):
781
776
 
782
777
  return sorted(self._compute_constant_fields_from_a_few_samples())
783
778
 
784
- def _compute_constant_fields_from_a_few_samples(self) -> List[str]:
779
+ def _compute_constant_fields_from_a_few_samples(self) -> list[str]:
785
780
  """Compute constant fields from a few samples.
786
781
 
787
782
  Returns
@@ -822,7 +817,7 @@ class Dataset(ABC, Sized):
822
817
 
823
818
  return [v for i, v in enumerate(self.variables) if constants[i]]
824
819
 
825
- def _compute_constant_fields_from_statistics(self) -> List[str]:
820
+ def _compute_constant_fields_from_statistics(self) -> list[str]:
826
821
  """Compute constant fields from statistics.
827
822
 
828
823
  Returns
@@ -842,8 +837,8 @@ class Dataset(ABC, Sized):
842
837
 
843
838
  def plot(
844
839
  self,
845
- date: Union[int, datetime.datetime, np.datetime64, str],
846
- variable: Union[int, str],
840
+ date: int | datetime.datetime | np.datetime64 | str,
841
+ variable: int | str,
847
842
  member: int = 0,
848
843
  **kwargs: Any,
849
844
  ) -> "matplotlib.pyplot.Axes":
@@ -873,10 +868,10 @@ class Dataset(ABC, Sized):
873
868
 
874
869
  def to_index(
875
870
  self,
876
- date: Union[int, datetime.datetime, np.datetime64, str],
877
- variable: Union[int, str],
871
+ date: int | datetime.datetime | np.datetime64 | str,
872
+ variable: int | str,
878
873
  member: int = 0,
879
- ) -> Tuple[int, int, int]:
874
+ ) -> tuple[int, int, int]:
880
875
  """Convert date, variable, and member to indices.
881
876
 
882
877
  Parameters
@@ -945,7 +940,7 @@ class Dataset(ABC, Sized):
945
940
 
946
941
  @property
947
942
  @abstractmethod
948
- def variables(self) -> List[str]:
943
+ def variables(self) -> list[str]:
949
944
  """Return the list of variables in the dataset."""
950
945
  pass
951
946
 
@@ -969,7 +964,7 @@ class Dataset(ABC, Sized):
969
964
 
970
965
  @property
971
966
  @abstractmethod
972
- def name_to_index(self) -> Dict[str, int]:
967
+ def name_to_index(self) -> dict[str, int]:
973
968
  """Return the mapping of variable names to indices."""
974
969
  pass
975
970
 
@@ -1005,30 +1000,30 @@ class Dataset(ABC, Sized):
1005
1000
 
1006
1001
  @property
1007
1002
  @abstractmethod
1008
- def variables_metadata(self) -> Dict[str, Any]:
1003
+ def variables_metadata(self) -> dict[str, Any]:
1009
1004
  """Return the metadata of the variables in the dataset."""
1010
1005
  pass
1011
1006
 
1012
1007
  @abstractmethod
1013
1008
  @cached_property
1014
- def missing(self) -> Set[int]:
1009
+ def missing(self) -> set[int]:
1015
1010
  """Return the set of missing indices in the dataset."""
1016
1011
  pass
1017
1012
 
1018
1013
  @abstractmethod
1019
1014
  @cached_property
1020
- def constant_fields(self) -> List[str]:
1015
+ def constant_fields(self) -> list[str]:
1021
1016
  """Return the list of constant fields in the dataset."""
1022
1017
  pass
1023
1018
 
1024
1019
  @abstractmethod
1025
1020
  @cached_property
1026
- def statistics(self) -> Dict[str, NDArray[Any]]:
1021
+ def statistics(self) -> dict[str, NDArray[Any]]:
1027
1022
  """Return the statistics of the dataset."""
1028
1023
  pass
1029
1024
 
1030
1025
  @abstractmethod
1031
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
1026
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
1032
1027
  """Return the tendencies of the statistics in the dataset.
1033
1028
 
1034
1029
  Parameters
@@ -1071,7 +1066,7 @@ class Dataset(ABC, Sized):
1071
1066
  pass
1072
1067
 
1073
1068
  @abstractmethod
1074
- def collect_input_sources(self, sources: List[Any]) -> None:
1069
+ def collect_input_sources(self, sources: list[Any]) -> None:
1075
1070
  """Collect the input sources of the dataset.
1076
1071
 
1077
1072
  Parameters
@@ -1082,7 +1077,7 @@ class Dataset(ABC, Sized):
1082
1077
  pass
1083
1078
 
1084
1079
  @abstractmethod
1085
- def get_dataset_names(self, names: Set[str]) -> None:
1080
+ def get_dataset_names(self, names: set[str]) -> None:
1086
1081
  """Get the names of the datasets.
1087
1082
 
1088
1083
  Parameters
@@ -11,12 +11,10 @@
11
11
  import logging
12
12
  import os
13
13
  import textwrap
14
+ from collections.abc import Callable
14
15
  from functools import wraps
15
16
  from typing import TYPE_CHECKING
16
17
  from typing import Any
17
- from typing import Callable
18
- from typing import List
19
- from typing import Optional
20
18
 
21
19
  from anemoi.utils.text import Tree
22
20
  from numpy.typing import NDArray
@@ -56,7 +54,7 @@ def css(name: str) -> str:
56
54
  class Node:
57
55
  """A class to represent a node in a dataset tree."""
58
56
 
59
- def __init__(self, dataset: "Dataset", kids: List[Any], **kwargs: Any) -> None:
57
+ def __init__(self, dataset: "Dataset", kids: list[Any], **kwargs: Any) -> None:
60
58
  """Initializes a Node object.
61
59
 
62
60
  Parameters
@@ -72,7 +70,7 @@ class Node:
72
70
  self.kids = kids
73
71
  self.kwargs = kwargs
74
72
 
75
- def _put(self, indent: int, result: List[str]) -> None:
73
+ def _put(self, indent: int, result: list[str]) -> None:
76
74
  """Helper method to add the node representation to the result list.
77
75
 
78
76
  Parameters
@@ -103,11 +101,11 @@ class Node:
103
101
  str
104
102
  String representation of the node.
105
103
  """
106
- result: List[str] = []
104
+ result: list[str] = []
107
105
  self._put(0, result)
108
106
  return "\n".join(result)
109
107
 
110
- def graph(self, digraph: List[str], nodes: dict) -> None:
108
+ def graph(self, digraph: list[str], nodes: dict) -> None:
111
109
  """Generates a graph representation of the node.
112
110
 
113
111
  Parameters
@@ -170,7 +168,7 @@ class Node:
170
168
  digraph.append("}")
171
169
  return "\n".join(digraph)
172
170
 
173
- def _html(self, indent: str, rows: List[List[str]]) -> None:
171
+ def _html(self, indent: str, rows: list[list[str]]) -> None:
174
172
  """Helper method to add the node representation to the HTML rows.
175
173
 
176
174
  Parameters
@@ -273,7 +271,7 @@ class Node:
273
271
  class Source:
274
272
  """A class used to follow the provenance of a data point."""
275
273
 
276
- def __init__(self, dataset: Any, index: int, source: Optional[Any] = None, info: Optional[Any] = None) -> None:
274
+ def __init__(self, dataset: Any, index: int, source: Any | None = None, info: Any | None = None) -> None:
277
275
  """Initializes a Source object.
278
276
 
279
277
  Parameters
@@ -10,8 +10,6 @@
10
10
 
11
11
  import logging
12
12
  from typing import Any
13
- from typing import Dict
14
- from typing import Tuple
15
13
 
16
14
  import numpy as np
17
15
  from numpy.typing import NDArray
@@ -105,7 +103,7 @@ class Number(Forwards):
105
103
  """
106
104
  return Node(self, [self.forward.tree()], numbers=[n + 1 for n in self.members])
107
105
 
108
- def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
106
+ def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
109
107
  """Returns metadata specific to the Number object.
110
108
 
111
109
  Parameters
@@ -122,7 +120,7 @@ class Number(Forwards):
122
120
  "numbers": [n + 1 for n in self.members],
123
121
  }
124
122
 
125
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
123
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
126
124
  """Returns metadata specific to the Number object."""
127
125
  return {}
128
126
 
@@ -140,7 +138,7 @@ class Ensemble(GivenAxis):
140
138
  """
141
139
  return Node(self, [d.tree() for d in self.datasets])
142
140
 
143
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
141
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
144
142
  """Get the metadata specific to the forwards subclass.
145
143
 
146
144
  Returns:
@@ -149,7 +147,7 @@ class Ensemble(GivenAxis):
149
147
  return {}
150
148
 
151
149
 
152
- def ensemble_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> Ensemble:
150
+ def ensemble_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> Ensemble:
153
151
  """Factory function to create an Ensemble object.
154
152
 
155
153
  Parameters
@@ -10,9 +10,6 @@
10
10
 
11
11
  import logging
12
12
  from typing import Any
13
- from typing import Dict
14
- from typing import Optional
15
- from typing import Set
16
13
 
17
14
  import numpy as np
18
15
  from numpy.typing import NDArray
@@ -46,7 +43,7 @@ class MissingDatesFill(Forwards):
46
43
  """
47
44
  super().__init__(dataset)
48
45
  self._missing = set(dataset.missing)
49
- self._warnings: Set[int] = set()
46
+ self._warnings: set[int] = set()
50
47
 
51
48
  @debug_indexing
52
49
  @expand_list_indexing
@@ -84,7 +81,7 @@ class MissingDatesFill(Forwards):
84
81
  return np.stack([self[i] for i in range(*s.indices(self._len))])
85
82
 
86
83
  @property
87
- def missing(self) -> Set[int]:
84
+ def missing(self) -> set[int]:
88
85
  """Get the set of missing dates."""
89
86
  return set()
90
87
 
@@ -153,7 +150,7 @@ class MissingDatesClosest(MissingDatesFill):
153
150
  self.closest = closest
154
151
  self._closest = {}
155
152
 
156
- def _fill_missing(self, n: int, a: Optional[int], b: Optional[int]) -> NDArray[Any]:
153
+ def _fill_missing(self, n: int, a: int | None, b: int | None) -> NDArray[Any]:
157
154
  """Fill the missing date at the given index.
158
155
 
159
156
  Parameters
@@ -189,7 +186,7 @@ class MissingDatesClosest(MissingDatesFill):
189
186
 
190
187
  return self.forward[self._closest[n]]
191
188
 
192
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
189
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
193
190
  """Get metadata specific to the subclass.
194
191
 
195
192
  Returns
@@ -224,7 +221,7 @@ class MissingDatesInterpolate(MissingDatesFill):
224
221
  super().__init__(dataset)
225
222
  self._alpha = {}
226
223
 
227
- def _fill_missing(self, n: int, a: Optional[int], b: Optional[int]) -> NDArray[Any]:
224
+ def _fill_missing(self, n: int, a: int | None, b: int | None) -> NDArray[Any]:
228
225
  """Fill the missing date at the given index using interpolation.
229
226
 
230
227
  Parameters
@@ -264,7 +261,7 @@ class MissingDatesInterpolate(MissingDatesFill):
264
261
  alpha = self._alpha[n]
265
262
  return self.forward[a] * (1 - alpha) + self.forward[b] * alpha
266
263
 
267
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
264
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
268
265
  """Get metadata specific to the subclass.
269
266
 
270
267
  Returns
@@ -285,7 +282,7 @@ class MissingDatesInterpolate(MissingDatesFill):
285
282
  return Node(self, [self.forward.tree()])
286
283
 
287
284
 
288
- def fill_missing_dates_factory(dataset: Any, method: str, kwargs: Dict[str, Any]) -> Dataset:
285
+ def fill_missing_dates_factory(dataset: Any, method: str, kwargs: dict[str, Any]) -> Dataset:
289
286
  """Factory function to create an instance of a class to fill missing dates.
290
287
 
291
288
  Parameters