anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -10,8 +10,6 @@ import logging
10
10
  import os
11
11
  from functools import cached_property
12
12
  from typing import Any
13
- from typing import Dict
14
- from typing import Tuple
15
13
 
16
14
  import numpy as np
17
15
  from anemoi.utils.dates import frequency_to_timedelta
@@ -82,10 +80,8 @@ class ObservationsBase(Dataset):
82
80
  # return [self.getitem(j) for j in i]
83
81
 
84
82
  raise ValueError(
85
- (
86
- f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
87
- "observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
88
- )
83
+ f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
84
+ "observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
89
85
  )
90
86
 
91
87
  @property
@@ -195,13 +191,11 @@ class ObservationsZarr(ObservationsBase):
195
191
 
196
192
  if len(self.forward) != len(self.dates):
197
193
  raise ValueError(
198
- (
199
- f"Dates are not consistent with the number of items in the dataset. "
200
- f"The dataset contains {len(self.forward)} time windows. "
201
- f"This is not compatible with the "
202
- f"{len(self.dates)} requested dates with frequency={frequency_hours}"
203
- f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
204
- )
194
+ f"Dates are not consistent with the number of items in the dataset. "
195
+ f"The dataset contains {len(self.forward)} time windows. "
196
+ f"This is not compatible with the "
197
+ f"{len(self.dates)} requested dates with frequency={frequency_hours}"
198
+ f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
205
199
  )
206
200
 
207
201
  @property
@@ -307,7 +301,7 @@ class ObservationsZarr(ObservationsBase):
307
301
  return f"Observations({os.path.basename(self.path)}, {self.dates[0]};{self.dates[-1]}, {len(self)})"
308
302
 
309
303
 
310
- def observations_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> ObservationsBase:
304
+ def observations_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> ObservationsBase:
311
305
  observations = kwargs.pop("observations")
312
306
 
313
307
  if not isinstance(observations, dict):
@@ -12,8 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import Set
17
15
 
18
16
  import numpy as np
19
17
  from anemoi.utils.dates import frequency_to_timedelta
@@ -38,7 +36,7 @@ class Padded(Forwards):
38
36
  _after: int = 0
39
37
  _inside: int = 0
40
38
 
41
- def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: Dict[str, Any]) -> None:
39
+ def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: dict[str, Any]) -> None:
42
40
  """Create a padded subset of a dataset.
43
41
 
44
42
  Attributes:
@@ -195,7 +193,7 @@ class Padded(Forwards):
195
193
  return (len(self.dates),) + self.dataset.shape[1:]
196
194
 
197
195
  @cached_property
198
- def missing(self) -> Set[int]:
196
+ def missing(self) -> set[int]:
199
197
  raise NotImplementedError("Need to decide whether to include the added dates as missing or not")
200
198
  # return self.forward.missing
201
199
 
@@ -207,7 +205,7 @@ class Padded(Forwards):
207
205
  """
208
206
  return Node(self, [self.dataset.tree()], **self.reason)
209
207
 
210
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
208
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
211
209
  """Get the metadata specific to the forwards subclass.
212
210
 
213
211
  Returns:
@@ -35,7 +35,7 @@ class Npz1Backend(Backend):
35
35
  return dict(np.load(f))
36
36
 
37
37
  def read_metadata(self):
38
- with open(os.path.join(self.path, "metadata.json"), "r") as f:
38
+ with open(os.path.join(self.path, "metadata.json")) as f:
39
39
  return json.load(f)
40
40
 
41
41
  def read_statistics(self):
@@ -56,7 +56,7 @@ class Npz2Backend(Backend):
56
56
  return dict(np.load(f))
57
57
 
58
58
  def read_metadata(self):
59
- with open(os.path.join(self.path, "metadata.json"), "r") as f:
59
+ with open(os.path.join(self.path, "metadata.json")) as f:
60
60
  return json.load(f)
61
61
 
62
62
  def read_statistics(self):
@@ -12,11 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Optional
18
- from typing import Tuple
19
- from typing import Union
20
15
 
21
16
  import numpy as np
22
17
  from numpy.typing import NDArray
@@ -35,9 +30,7 @@ from .indexing import update_tuple
35
30
  LOG = logging.getLogger(__name__)
36
31
 
37
32
 
38
- def make_rescale(
39
- variable: str, rescale: Union[Tuple[float, float], List[str], Dict[str, float]]
40
- ) -> Tuple[float, float]:
33
+ def make_rescale(variable: str, rescale: tuple[float, float] | list[str] | dict[str, float]) -> tuple[float, float]:
41
34
  """Create rescale parameters (scale and offset) based on the input rescale specification.
42
35
 
43
36
  Parameters
@@ -86,7 +79,7 @@ class Rescale(Forwards):
86
79
  """A class to apply rescaling to dataset variables."""
87
80
 
88
81
  def __init__(
89
- self, dataset: Dataset, rescale: Dict[str, Union[Tuple[float, float], List[str], Dict[str, float]]]
82
+ self, dataset: Dataset, rescale: dict[str, tuple[float, float] | list[str] | dict[str, float]]
90
83
  ) -> None:
91
84
  """Initialize the Rescale object.
92
85
 
@@ -129,7 +122,7 @@ class Rescale(Forwards):
129
122
  """
130
123
  return Node(self, [self.forward.tree()], rescale=self.rescale)
131
124
 
132
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
125
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
133
126
  """Get the metadata specific to the rescale subclass.
134
127
 
135
128
  Returns
@@ -204,7 +197,7 @@ class Rescale(Forwards):
204
197
  return data * self._a[0] + self._b[0]
205
198
 
206
199
  @cached_property
207
- def statistics(self) -> Dict[str, NDArray[Any]]:
200
+ def statistics(self) -> dict[str, NDArray[Any]]:
208
201
  """Get the statistics of the rescaled data."""
209
202
  result = {}
210
203
  a = self._a.squeeze()
@@ -224,7 +217,7 @@ class Rescale(Forwards):
224
217
 
225
218
  return result
226
219
 
227
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
220
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
228
221
  """Get the tendencies of the statistics of the rescaled data.
229
222
 
230
223
  Parameters
@@ -0,0 +1,141 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ import logging
12
+ from functools import cached_property
13
+ from typing import Any
14
+
15
+ import numpy as np
16
+ from numpy.typing import NDArray
17
+
18
+ from anemoi.datasets.data.indexing import expand_list_indexing
19
+
20
+ from .dataset import Dataset
21
+ from .dataset import FullIndex
22
+ from .debug import Node
23
+ from .debug import debug_indexing
24
+ from .forwards import Forwards
25
+
26
+ LOG = logging.getLogger(__name__)
27
+
28
+
29
+ class RollingAverage(Forwards):
30
+ """A class to represent a dataset with interpolated frequency."""
31
+
32
+ def __init__(self, dataset: Dataset, window: str | tuple[int, int, str]) -> None:
33
+ """Initialize the RollingAverage class.
34
+
35
+ Parameters
36
+ ----------
37
+ dataset : Dataset
38
+ The dataset to be averaged with a rolling window.
39
+ window : (int, int, str)
40
+ The rolling average window (start, end, 'freq').
41
+ 'freq' means the window is in number of time steps in the dataset.
42
+ Both start and end are inclusive, i.e. window = (-2, 2, 'freq') means a window of 5 time steps.
43
+ For now, only 'freq' is supported, in the future other units may be supported.
44
+ Windows such as "[-2h, +2h]" are not supported yet.
45
+ """
46
+ super().__init__(dataset)
47
+ if not (isinstance(window, (list, tuple)) and len(window) == 3):
48
+ raise ValueError(f"Window must be (int, int, str), got {window}")
49
+ if not isinstance(window[0], int) or not isinstance(window[1], int) or not isinstance(window[2], str):
50
+ raise ValueError(f"Window must be (int, int, str), got {window}")
51
+ if window[2] not in ["freq", "frequency"]:
52
+ raise NotImplementedError(f"Window must be (int, int, 'freq'), got {window}")
53
+
54
+ # window = (0, 0, 'freq') means no change
55
+ self.i_start = -window[0]
56
+ self.i_end = window[1] + 1
57
+ if self.i_start <= 0:
58
+ raise ValueError(f"Window start must be negative, got {window}")
59
+ if self.i_end <= 0:
60
+ raise ValueError(f"Window end must be positive, got {window}")
61
+
62
+ self.window_str = f"-{self.i_start}-to-{self.i_end}"
63
+
64
+ @property
65
+ def shape(self):
66
+ shape = list(self.forward.shape)
67
+ shape[0] = len(self)
68
+ return tuple(shape)
69
+
70
+ @debug_indexing
71
+ @expand_list_indexing
72
+ def __getitem__(self, n: FullIndex) -> NDArray[Any]:
73
+ def f(array):
74
+ return np.nanmean(array, axis=0)
75
+
76
+ if isinstance(n, slice):
77
+ n = (n,)
78
+
79
+ if isinstance(n, tuple):
80
+ first = n[0]
81
+ if len(n) > 1:
82
+ rest = n[1:]
83
+ else:
84
+ rest = ()
85
+
86
+ if isinstance(first, int):
87
+ slice_ = slice(first, first + self.i_start + self.i_end)
88
+ data = self.forward[(slice_,) + rest]
89
+ return f(data)
90
+
91
+ if isinstance(first, slice):
92
+ first = list(range(first.start or 0, first.stop or len(self), first.step or 1))
93
+
94
+ if isinstance(first, (list, tuple)):
95
+ first = [i if i >= 0 else len(self) + i for i in first]
96
+ if any(i >= len(self) for i in first):
97
+ raise IndexError(f"Index out of range: {first}")
98
+ slices = [slice(i, i + self.i_start + self.i_end) for i in first]
99
+ data = [self.forward[(slice_,) + rest] for slice_ in slices]
100
+ res = [f(d) for d in data]
101
+ return np.array(res)
102
+
103
+ assert False, f"Expected int, slice, list or tuple as first element of tuple, got {type(first)}"
104
+
105
+ assert isinstance(n, int), f"Expected int, slice, tuple, got {type(n)}"
106
+
107
+ if n < 0:
108
+ n = len(self) + n
109
+ if n >= len(self):
110
+ raise IndexError(f"Index out of range: {n}")
111
+
112
+ slice_ = slice(n, n + self.i_start + self.i_end)
113
+ data = self.forward[slice_]
114
+ return f(data)
115
+
116
+ def __len__(self) -> int:
117
+ return len(self.forward) - (self.i_end + self.i_start - 1)
118
+
119
+ @cached_property
120
+ def dates(self) -> NDArray[np.datetime64]:
121
+ """Get the interpolated dates."""
122
+ dates = self.forward.dates
123
+ return dates[self.i_start : len(dates) - self.i_end + 1]
124
+
125
+ def tree(self) -> Node:
126
+ return Node(self, [self.forward.tree()], window=self.window_str)
127
+
128
+ @cached_property
129
+ def missing(self) -> set[int]:
130
+ """Get the missing data indices."""
131
+ result = []
132
+
133
+ for i in self.forward.missing:
134
+ for j in range(0, self.i_end + self.i_start):
135
+ result.append(i + j)
136
+
137
+ result = {x for x in result if x < self._len}
138
+ return result
139
+
140
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
141
+ return {}
@@ -12,9 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Optional
18
15
 
19
16
  from numpy.typing import NDArray
20
17
 
@@ -37,7 +34,7 @@ LOG = logging.getLogger(__name__)
37
34
  class Select(Forwards):
38
35
  """Class to select a subset of variables from a dataset."""
39
36
 
40
- def __init__(self, dataset: Dataset, indices: List[int], reason: Dict[str, Any]) -> None:
37
+ def __init__(self, dataset: Dataset, indices: list[int], reason: dict[str, Any]) -> None:
41
38
  """Initialize the Select class.
42
39
 
43
40
  Parameters
@@ -140,26 +137,26 @@ class Select(Forwards):
140
137
  return (len(self), len(self.indices)) + self.dataset.shape[2:]
141
138
 
142
139
  @cached_property
143
- def variables(self) -> List[str]:
140
+ def variables(self) -> list[str]:
144
141
  """Get the variables of the dataset."""
145
142
  return [self.dataset.variables[i] for i in self.indices]
146
143
 
147
144
  @cached_property
148
- def variables_metadata(self) -> Dict[str, Any]:
145
+ def variables_metadata(self) -> dict[str, Any]:
149
146
  """Get the metadata of the variables."""
150
147
  return {k: v for k, v in self.dataset.variables_metadata.items() if k in self.variables}
151
148
 
152
149
  @cached_property
153
- def name_to_index(self) -> Dict[str, int]:
150
+ def name_to_index(self) -> dict[str, int]:
154
151
  """Get the mapping of variable names to indices."""
155
152
  return {k: i for i, k in enumerate(self.variables)}
156
153
 
157
154
  @cached_property
158
- def statistics(self) -> Dict[str, NDArray[Any]]:
155
+ def statistics(self) -> dict[str, NDArray[Any]]:
159
156
  """Get the statistics of the dataset."""
160
157
  return {k: v[self.indices] for k, v in self.dataset.statistics.items()}
161
158
 
162
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
159
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
163
160
  """Get the statistical tendencies of the dataset.
164
161
 
165
162
  Parameters
@@ -176,7 +173,7 @@ class Select(Forwards):
176
173
  delta = self.frequency
177
174
  return {k: v[self.indices] for k, v in self.dataset.statistics_tendencies(delta).items()}
178
175
 
179
- def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
176
+ def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
180
177
  """Get the specific metadata of the dataset.
181
178
 
182
179
  Parameters
@@ -216,7 +213,7 @@ class Select(Forwards):
216
213
  """
217
214
  return Node(self, [self.dataset.tree()], **self.reason)
218
215
 
219
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
216
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
220
217
  """Get the metadata specific to the subclass.
221
218
 
222
219
  Returns
@@ -231,7 +228,7 @@ class Select(Forwards):
231
228
  class Rename(Forwards):
232
229
  """Class to rename variables in a dataset."""
233
230
 
234
- def __init__(self, dataset: Dataset, rename: Dict[str, str]) -> None:
231
+ def __init__(self, dataset: Dataset, rename: dict[str, str]) -> None:
235
232
  """Initialize the Rename class.
236
233
 
237
234
  Parameters
@@ -251,17 +248,17 @@ class Rename(Forwards):
251
248
  self.rename = rename
252
249
 
253
250
  @property
254
- def variables(self) -> List[str]:
251
+ def variables(self) -> list[str]:
255
252
  """Get the renamed variables."""
256
253
  return self._variables
257
254
 
258
255
  @property
259
- def variables_metadata(self) -> Dict[str, Any]:
256
+ def variables_metadata(self) -> dict[str, Any]:
260
257
  """Get the renamed variables metadata."""
261
258
  return self._variables_metadata
262
259
 
263
260
  @cached_property
264
- def name_to_index(self) -> Dict[str, int]:
261
+ def name_to_index(self) -> dict[str, int]:
265
262
  """Get the mapping of renamed variable names to indices."""
266
263
  return {k: i for i, k in enumerate(self.variables)}
267
264
 
@@ -273,7 +270,7 @@ class Rename(Forwards):
273
270
  """
274
271
  return Node(self, [self.forward.tree()], rename=self.rename)
275
272
 
276
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
273
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
277
274
  """Get the metadata specific to the subclass.
278
275
 
279
276
  Returns:
@@ -12,9 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import Optional
17
- from typing import Set
18
15
 
19
16
  from numpy.typing import NDArray
20
17
 
@@ -56,11 +53,11 @@ class Statistics(Forwards):
56
53
  )
57
54
 
58
55
  @cached_property
59
- def statistics(self) -> Dict[str, NDArray[Any]]:
56
+ def statistics(self) -> dict[str, NDArray[Any]]:
60
57
  """Get the statistics."""
61
58
  return self._statistic.statistics
62
59
 
63
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
60
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
64
61
  """Get the statistics tendencies.
65
62
 
66
63
  Parameters
@@ -77,7 +74,7 @@ class Statistics(Forwards):
77
74
  delta = self.frequency
78
75
  return self._statistic.statistics_tendencies(delta)
79
76
 
80
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
77
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
81
78
  """Get the metadata specific to the forwards subclass.
82
79
 
83
80
  Returns
@@ -97,7 +94,7 @@ class Statistics(Forwards):
97
94
  """
98
95
  return Node(self, [self.forward.tree()])
99
96
 
100
- def get_dataset_names(self, names: Set[str]) -> None:
97
+ def get_dataset_names(self, names: set[str]) -> None:
101
98
  """Get the dataset names.
102
99
 
103
100
  Parameters
@@ -15,11 +15,6 @@ import tempfile
15
15
  import warnings
16
16
  from functools import cached_property
17
17
  from typing import Any
18
- from typing import Dict
19
- from typing import List
20
- from typing import Optional
21
- from typing import Set
22
- from typing import Union
23
18
  from urllib.parse import urlparse
24
19
 
25
20
  import numpy as np
@@ -90,22 +85,20 @@ class S3Store(ReadOnlyStore):
90
85
  options using the anemoi configs.
91
86
  """
92
87
 
93
- def __init__(self, url: str, region: Optional[str] = None) -> None:
94
- """Initialize the S3Store with a URL and optional region."""
95
- from anemoi.utils.remote.s3 import s3_client
88
+ def __init__(self, url: str) -> None:
89
+ """Initialize the S3Store with a URL."""
96
90
 
97
- _, _, self.bucket, self.key = url.split("/", 3)
98
- self.s3 = s3_client(self.bucket, region=region)
91
+ self.url = url
99
92
 
100
93
  def __getitem__(self, key: str) -> bytes:
101
94
  """Retrieve an item from the store."""
95
+ from anemoi.utils.remote.s3 import get_object
96
+
102
97
  try:
103
- response = self.s3.get_object(Bucket=self.bucket, Key=self.key + "/" + key)
104
- except self.s3.exceptions.NoSuchKey:
98
+ return get_object(os.path.join(self.url, key))
99
+ except FileNotFoundError:
105
100
  raise KeyError(key)
106
101
 
107
- return response["Body"].read()
108
-
109
102
 
110
103
  class DebugStore(ReadOnlyStore):
111
104
  """A store to debug the zarr loading."""
@@ -199,7 +192,7 @@ def open_zarr(path: str, dont_fail: bool = False, cache: int = None) -> zarr.hie
199
192
  class Zarr(Dataset):
200
193
  """A zarr dataset."""
201
194
 
202
- def __init__(self, path: Union[str, zarr.hierarchy.Group]) -> None:
195
+ def __init__(self, path: str | zarr.hierarchy.Group) -> None:
203
196
  """Initialize the Zarr dataset with a path or zarr group."""
204
197
  if isinstance(path, zarr.hierarchy.Group):
205
198
  self.was_zarr = True
@@ -215,7 +208,7 @@ class Zarr(Dataset):
215
208
  self._missing = set()
216
209
 
217
210
  @property
218
- def missing(self) -> Set[int]:
211
+ def missing(self) -> set[int]:
219
212
  """Return the missing dates of the dataset."""
220
213
  return self._missing
221
214
 
@@ -236,7 +229,7 @@ class Zarr(Dataset):
236
229
  """Retrieve an item from the dataset."""
237
230
  return self.data[n]
238
231
 
239
- def _unwind(self, index: Union[int, slice, list, tuple], rest: list, shape: tuple, axis: int, axes: list) -> iter:
232
+ def _unwind(self, index: int | slice | list | tuple, rest: list, shape: tuple, axis: int, axes: list) -> iter:
240
233
  """Unwind the index for multi-dimensional indexing."""
241
234
  if not isinstance(index, (int, slice, list, tuple)):
242
235
  try:
@@ -298,7 +291,7 @@ class Zarr(Dataset):
298
291
  return self.z.longitude[:]
299
292
 
300
293
  @property
301
- def statistics(self) -> Dict[str, NDArray[Any]]:
294
+ def statistics(self) -> dict[str, NDArray[Any]]:
302
295
  """Return the statistics of the dataset."""
303
296
  return dict(
304
297
  mean=self.z.mean[:],
@@ -307,7 +300,7 @@ class Zarr(Dataset):
307
300
  minimum=self.z.minimum[:],
308
301
  )
309
302
 
310
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
303
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
311
304
  """Return the statistical tendencies of the dataset."""
312
305
  if delta is None:
313
306
  delta = self.frequency
@@ -354,14 +347,14 @@ class Zarr(Dataset):
354
347
  return dates[1].astype(object) - dates[0].astype(object)
355
348
 
356
349
  @property
357
- def name_to_index(self) -> Dict[str, int]:
350
+ def name_to_index(self) -> dict[str, int]:
358
351
  """Return the name to index mapping of the dataset."""
359
352
  if "variables" in self.z.attrs:
360
353
  return {n: i for i, n in enumerate(self.z.attrs["variables"])}
361
354
  return self.z.attrs["name_to_index"]
362
355
 
363
356
  @property
364
- def variables(self) -> List[str]:
357
+ def variables(self) -> list[str]:
365
358
  """Return the variables of the dataset."""
366
359
  return [
367
360
  k
@@ -372,7 +365,7 @@ class Zarr(Dataset):
372
365
  ]
373
366
 
374
367
  @cached_property
375
- def constant_fields(self) -> List[str]:
368
+ def constant_fields(self) -> list[str]:
376
369
  """Return the constant fields of the dataset."""
377
370
  result = self.z.attrs.get("constant_fields")
378
371
  if result is None:
@@ -380,7 +373,7 @@ class Zarr(Dataset):
380
373
  return self.computed_constant_fields()
381
374
 
382
375
  @property
383
- def variables_metadata(self) -> Dict[str, Any]:
376
+ def variables_metadata(self) -> dict[str, Any]:
384
377
  """Return the metadata of the variables."""
385
378
  return self.z.attrs.get("variables_metadata", {})
386
379
 
@@ -392,7 +385,7 @@ class Zarr(Dataset):
392
385
  """Return the end date of the statistics."""
393
386
  return self.dates[-1]
394
387
 
395
- def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
388
+ def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
396
389
  """Return the specific metadata of the dataset."""
397
390
  return super().metadata_specific(
398
391
  attrs=dict(self.z.attrs),
@@ -416,7 +409,7 @@ class Zarr(Dataset):
416
409
  """Return the tree representation of the dataset."""
417
410
  return Node(self, [], path=self.path)
418
411
 
419
- def get_dataset_names(self, names: Set[str]) -> None:
412
+ def get_dataset_names(self, names: set[str]) -> None:
420
413
  """Get the names of the datasets."""
421
414
  name, _ = os.path.splitext(os.path.basename(self.path))
422
415
  names.add(name)
@@ -433,17 +426,17 @@ class Zarr(Dataset):
433
426
  class ZarrWithMissingDates(Zarr):
434
427
  """A zarr dataset with missing dates."""
435
428
 
436
- def __init__(self, path: Union[str, zarr.hierarchy.Group]) -> None:
429
+ def __init__(self, path: str | zarr.hierarchy.Group) -> None:
437
430
  """Initialize the ZarrWithMissingDates dataset with a path or zarr group."""
438
431
  super().__init__(path)
439
432
 
440
433
  missing_dates = self.z.attrs.get("missing_dates", [])
441
- missing_dates = set([np.datetime64(x, "s") for x in missing_dates])
434
+ missing_dates = {np.datetime64(x, "s") for x in missing_dates}
442
435
  self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
443
436
  self._missing = set(self.missing_to_dates)
444
437
 
445
438
  @property
446
- def missing(self) -> Set[int]:
439
+ def missing(self) -> set[int]:
447
440
  """Return the missing dates of the dataset."""
448
441
  return self._missing
449
442
 
@@ -506,7 +499,7 @@ class ZarrWithMissingDates(Zarr):
506
499
  QUIET = set()
507
500
 
508
501
 
509
- def zarr_lookup(name: str, fail: bool = True) -> Optional[str]:
502
+ def zarr_lookup(name: str, fail: bool = True) -> str | None:
510
503
  """Look up a zarr dataset by name."""
511
504
 
512
505
  config = load_config()["datasets"]