anemoi-datasets 0.5.25__py3-none-any.whl → 0.5.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/grib-index.py +1 -1
  7. anemoi/datasets/commands/inspect.py +27 -35
  8. anemoi/datasets/commands/validate.py +59 -0
  9. anemoi/datasets/compute/recentre.py +3 -6
  10. anemoi/datasets/create/__init__.py +22 -25
  11. anemoi/datasets/create/check.py +10 -12
  12. anemoi/datasets/create/chunks.py +1 -2
  13. anemoi/datasets/create/config.py +3 -6
  14. anemoi/datasets/create/filter.py +21 -24
  15. anemoi/datasets/create/input/__init__.py +1 -2
  16. anemoi/datasets/create/input/action.py +3 -5
  17. anemoi/datasets/create/input/concat.py +5 -8
  18. anemoi/datasets/create/input/context.py +3 -6
  19. anemoi/datasets/create/input/data_sources.py +5 -8
  20. anemoi/datasets/create/input/empty.py +1 -2
  21. anemoi/datasets/create/input/filter.py +2 -3
  22. anemoi/datasets/create/input/function.py +1 -2
  23. anemoi/datasets/create/input/join.py +4 -5
  24. anemoi/datasets/create/input/misc.py +4 -6
  25. anemoi/datasets/create/input/repeated_dates.py +13 -18
  26. anemoi/datasets/create/input/result.py +29 -33
  27. anemoi/datasets/create/input/step.py +6 -24
  28. anemoi/datasets/create/input/template.py +3 -4
  29. anemoi/datasets/create/input/trace.py +1 -1
  30. anemoi/datasets/create/patch.py +1 -2
  31. anemoi/datasets/create/persistent.py +3 -5
  32. anemoi/datasets/create/size.py +1 -3
  33. anemoi/datasets/create/sources/accumulations.py +47 -52
  34. anemoi/datasets/create/sources/accumulations2.py +4 -8
  35. anemoi/datasets/create/sources/constants.py +1 -3
  36. anemoi/datasets/create/sources/empty.py +1 -2
  37. anemoi/datasets/create/sources/fdb.py +133 -0
  38. anemoi/datasets/create/sources/forcings.py +1 -2
  39. anemoi/datasets/create/sources/grib.py +6 -10
  40. anemoi/datasets/create/sources/grib_index.py +13 -15
  41. anemoi/datasets/create/sources/hindcasts.py +2 -5
  42. anemoi/datasets/create/sources/legacy.py +1 -1
  43. anemoi/datasets/create/sources/mars.py +17 -21
  44. anemoi/datasets/create/sources/netcdf.py +1 -2
  45. anemoi/datasets/create/sources/opendap.py +1 -3
  46. anemoi/datasets/create/sources/patterns.py +4 -6
  47. anemoi/datasets/create/sources/planetary_computer.py +44 -0
  48. anemoi/datasets/create/sources/recentre.py +8 -11
  49. anemoi/datasets/create/sources/source.py +3 -6
  50. anemoi/datasets/create/sources/tendencies.py +2 -5
  51. anemoi/datasets/create/sources/xarray.py +4 -6
  52. anemoi/datasets/create/sources/xarray_support/__init__.py +15 -32
  53. anemoi/datasets/create/sources/xarray_support/coordinates.py +16 -12
  54. anemoi/datasets/create/sources/xarray_support/field.py +17 -16
  55. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  56. anemoi/datasets/create/sources/xarray_support/flavour.py +83 -45
  57. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  58. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  59. anemoi/datasets/create/sources/xarray_support/patch.py +47 -6
  60. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  61. anemoi/datasets/create/sources/xarray_support/variable.py +27 -23
  62. anemoi/datasets/create/sources/xarray_zarr.py +1 -2
  63. anemoi/datasets/create/sources/zenodo.py +3 -5
  64. anemoi/datasets/create/statistics/__init__.py +3 -6
  65. anemoi/datasets/create/testing.py +2 -74
  66. anemoi/datasets/create/typing.py +1 -2
  67. anemoi/datasets/create/utils.py +1 -2
  68. anemoi/datasets/create/zarr.py +7 -2
  69. anemoi/datasets/data/__init__.py +15 -6
  70. anemoi/datasets/data/complement.py +52 -23
  71. anemoi/datasets/data/concat.py +5 -8
  72. anemoi/datasets/data/dataset.py +42 -47
  73. anemoi/datasets/data/debug.py +7 -9
  74. anemoi/datasets/data/ensemble.py +4 -6
  75. anemoi/datasets/data/fill_missing.py +7 -10
  76. anemoi/datasets/data/forwards.py +30 -28
  77. anemoi/datasets/data/grids.py +12 -16
  78. anemoi/datasets/data/indexing.py +9 -12
  79. anemoi/datasets/data/interpolate.py +7 -15
  80. anemoi/datasets/data/join.py +8 -12
  81. anemoi/datasets/data/masked.py +6 -11
  82. anemoi/datasets/data/merge.py +5 -9
  83. anemoi/datasets/data/misc.py +41 -45
  84. anemoi/datasets/data/missing.py +11 -16
  85. anemoi/datasets/data/observations/__init__.py +8 -14
  86. anemoi/datasets/data/padded.py +3 -5
  87. anemoi/datasets/data/records/backends/__init__.py +2 -2
  88. anemoi/datasets/data/rescale.py +5 -12
  89. anemoi/datasets/data/select.py +13 -16
  90. anemoi/datasets/data/statistics.py +4 -7
  91. anemoi/datasets/data/stores.py +23 -77
  92. anemoi/datasets/data/subset.py +8 -11
  93. anemoi/datasets/data/unchecked.py +7 -11
  94. anemoi/datasets/data/xy.py +25 -21
  95. anemoi/datasets/dates/__init__.py +13 -18
  96. anemoi/datasets/dates/groups.py +7 -10
  97. anemoi/datasets/grids.py +11 -12
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +5 -4
  101. anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filters/__init__.py +0 -33
  103. anemoi/datasets/create/filters/empty.py +0 -37
  104. anemoi/datasets/create/filters/legacy.py +0 -93
  105. anemoi/datasets/create/filters/noop.py +0 -37
  106. anemoi/datasets/create/filters/orog_to_z.py +0 -58
  107. anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
  108. anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
  109. anemoi/datasets/create/filters/rename.py +0 -205
  110. anemoi/datasets/create/filters/rotate_winds.py +0 -105
  111. anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
  112. anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
  113. anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
  114. anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
  115. anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
  116. anemoi/datasets/create/filters/sum.py +0 -68
  117. anemoi/datasets/create/filters/transform.py +0 -51
  118. anemoi/datasets/create/filters/unrotate_winds.py +0 -105
  119. anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
  120. anemoi/datasets/create/filters/wz_to_w.py +0 -98
  121. anemoi/datasets/utils/__init__.py +0 -8
  122. anemoi_datasets-0.5.25.dist-info/RECORD +0 -150
  123. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
  124. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
  125. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
  126. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
@@ -12,11 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Set
18
- from typing import Tuple
19
- from typing import Union
20
15
 
21
16
  import numpy as np
22
17
  from numpy.typing import NDArray
@@ -49,7 +44,7 @@ class MissingDates(Forwards):
49
44
  List of missing dates.
50
45
  """
51
46
 
52
- def __init__(self, dataset: Dataset, missing_dates: List[Union[int, str]]) -> None:
47
+ def __init__(self, dataset: Dataset, missing_dates: list[int | str]) -> None:
53
48
  """Initializes the MissingDates class.
54
49
 
55
50
  Parameters
@@ -80,13 +75,13 @@ class MissingDates(Forwards):
80
75
  self.missing_dates.append(date)
81
76
 
82
77
  n = self.forward._len
83
- self._missing = set(i for i in self._missing if 0 <= i < n)
78
+ self._missing = {i for i in self._missing if 0 <= i < n}
84
79
  self.missing_dates = sorted(to_datetime(x) for x in self.missing_dates)
85
80
 
86
81
  assert len(self._missing), "No dates to force missing"
87
82
 
88
83
  @cached_property
89
- def missing(self) -> Set[int]:
84
+ def missing(self) -> set[int]:
90
85
  """Returns the set of missing indices."""
91
86
  return self._missing.union(self.forward.missing)
92
87
 
@@ -148,7 +143,7 @@ class MissingDates(Forwards):
148
143
  raise MissingDateError(f"Date {self.forward.dates[n]} is missing (index={n})")
149
144
 
150
145
  @property
151
- def reason(self) -> Dict[str, Any]:
146
+ def reason(self) -> dict[str, Any]:
152
147
  """Provides the reason for missing dates."""
153
148
  return {"missing_dates": self.missing_dates}
154
149
 
@@ -162,7 +157,7 @@ class MissingDates(Forwards):
162
157
  """
163
158
  return Node(self, [self.forward.tree()], **self.reason)
164
159
 
165
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
160
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
166
161
  """Provides metadata specific to the subclass.
167
162
 
168
163
  Returns
@@ -184,7 +179,7 @@ class SkipMissingDates(Forwards):
184
179
  The expected access pattern.
185
180
  """
186
181
 
187
- def __init__(self, dataset: Dataset, expected_access: Union[int, slice]) -> None:
182
+ def __init__(self, dataset: Dataset, expected_access: int | slice) -> None:
188
183
  """Initializes the SkipMissingDates class.
189
184
 
190
185
  Parameters
@@ -285,7 +280,7 @@ class SkipMissingDates(Forwards):
285
280
  return tuple(np.stack(_) for _ in result)
286
281
 
287
282
  @debug_indexing
288
- def _get_slice(self, s: slice) -> Tuple[NDArray[Any], ...]:
283
+ def _get_slice(self, s: slice) -> tuple[NDArray[Any], ...]:
289
284
  """Retrieves a slice of items.
290
285
 
291
286
  Parameters
@@ -303,7 +298,7 @@ class SkipMissingDates(Forwards):
303
298
  return tuple(np.stack(_) for _ in result)
304
299
 
305
300
  @debug_indexing
306
- def __getitem__(self, n: FullIndex) -> Tuple[NDArray[Any], ...]:
301
+ def __getitem__(self, n: FullIndex) -> tuple[NDArray[Any], ...]:
307
302
  """Retrieves the item at the given index.
308
303
 
309
304
  Parameters
@@ -339,7 +334,7 @@ class SkipMissingDates(Forwards):
339
334
  """
340
335
  return Node(self, [self.forward.tree()], expected_access=self.expected_access)
341
336
 
342
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
337
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
343
338
  """Provides metadata specific to the subclass.
344
339
 
345
340
  Returns
@@ -404,7 +399,7 @@ class MissingDataset(Forwards):
404
399
  return self._dates
405
400
 
406
401
  @property
407
- def missing(self) -> Set[int]:
402
+ def missing(self) -> set[int]:
408
403
  """Returns the set of missing indices."""
409
404
  return self._missing
410
405
 
@@ -436,7 +431,7 @@ class MissingDataset(Forwards):
436
431
  """
437
432
  return Node(self, [self.forward.tree()], start=self.start, end=self.end)
438
433
 
439
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
434
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
440
435
  """Provides metadata specific to the subclass.
441
436
 
442
437
  Returns
@@ -10,8 +10,6 @@ import logging
10
10
  import os
11
11
  from functools import cached_property
12
12
  from typing import Any
13
- from typing import Dict
14
- from typing import Tuple
15
13
 
16
14
  import numpy as np
17
15
  from anemoi.utils.dates import frequency_to_timedelta
@@ -82,10 +80,8 @@ class ObservationsBase(Dataset):
82
80
  # return [self.getitem(j) for j in i]
83
81
 
84
82
  raise ValueError(
85
- (
86
- f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
87
- "observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
88
- )
83
+ f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
84
+ "observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
89
85
  )
90
86
 
91
87
  @property
@@ -195,13 +191,11 @@ class ObservationsZarr(ObservationsBase):
195
191
 
196
192
  if len(self.forward) != len(self.dates):
197
193
  raise ValueError(
198
- (
199
- f"Dates are not consistent with the number of items in the dataset. "
200
- f"The dataset contains {len(self.forward)} time windows. "
201
- f"This is not compatible with the "
202
- f"{len(self.dates)} requested dates with frequency={frequency_hours}"
203
- f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
204
- )
194
+ f"Dates are not consistent with the number of items in the dataset. "
195
+ f"The dataset contains {len(self.forward)} time windows. "
196
+ f"This is not compatible with the "
197
+ f"{len(self.dates)} requested dates with frequency={frequency_hours}"
198
+ f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
205
199
  )
206
200
 
207
201
  @property
@@ -307,7 +301,7 @@ class ObservationsZarr(ObservationsBase):
307
301
  return f"Observations({os.path.basename(self.path)}, {self.dates[0]};{self.dates[-1]}, {len(self)})"
308
302
 
309
303
 
310
- def observations_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> ObservationsBase:
304
+ def observations_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> ObservationsBase:
311
305
  observations = kwargs.pop("observations")
312
306
 
313
307
  if not isinstance(observations, dict):
@@ -12,8 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import Set
17
15
 
18
16
  import numpy as np
19
17
  from anemoi.utils.dates import frequency_to_timedelta
@@ -38,7 +36,7 @@ class Padded(Forwards):
38
36
  _after: int = 0
39
37
  _inside: int = 0
40
38
 
41
- def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: Dict[str, Any]) -> None:
39
+ def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: dict[str, Any]) -> None:
42
40
  """Create a padded subset of a dataset.
43
41
 
44
42
  Attributes:
@@ -195,7 +193,7 @@ class Padded(Forwards):
195
193
  return (len(self.dates),) + self.dataset.shape[1:]
196
194
 
197
195
  @cached_property
198
- def missing(self) -> Set[int]:
196
+ def missing(self) -> set[int]:
199
197
  raise NotImplementedError("Need to decide whether to include the added dates as missing or not")
200
198
  # return self.forward.missing
201
199
 
@@ -207,7 +205,7 @@ class Padded(Forwards):
207
205
  """
208
206
  return Node(self, [self.dataset.tree()], **self.reason)
209
207
 
210
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
208
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
211
209
  """Get the metadata specific to the forwards subclass.
212
210
 
213
211
  Returns:
@@ -35,7 +35,7 @@ class Npz1Backend(Backend):
35
35
  return dict(np.load(f))
36
36
 
37
37
  def read_metadata(self):
38
- with open(os.path.join(self.path, "metadata.json"), "r") as f:
38
+ with open(os.path.join(self.path, "metadata.json")) as f:
39
39
  return json.load(f)
40
40
 
41
41
  def read_statistics(self):
@@ -56,7 +56,7 @@ class Npz2Backend(Backend):
56
56
  return dict(np.load(f))
57
57
 
58
58
  def read_metadata(self):
59
- with open(os.path.join(self.path, "metadata.json"), "r") as f:
59
+ with open(os.path.join(self.path, "metadata.json")) as f:
60
60
  return json.load(f)
61
61
 
62
62
  def read_statistics(self):
@@ -12,11 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Optional
18
- from typing import Tuple
19
- from typing import Union
20
15
 
21
16
  import numpy as np
22
17
  from numpy.typing import NDArray
@@ -35,9 +30,7 @@ from .indexing import update_tuple
35
30
  LOG = logging.getLogger(__name__)
36
31
 
37
32
 
38
- def make_rescale(
39
- variable: str, rescale: Union[Tuple[float, float], List[str], Dict[str, float]]
40
- ) -> Tuple[float, float]:
33
+ def make_rescale(variable: str, rescale: tuple[float, float] | list[str] | dict[str, float]) -> tuple[float, float]:
41
34
  """Create rescale parameters (scale and offset) based on the input rescale specification.
42
35
 
43
36
  Parameters
@@ -86,7 +79,7 @@ class Rescale(Forwards):
86
79
  """A class to apply rescaling to dataset variables."""
87
80
 
88
81
  def __init__(
89
- self, dataset: Dataset, rescale: Dict[str, Union[Tuple[float, float], List[str], Dict[str, float]]]
82
+ self, dataset: Dataset, rescale: dict[str, tuple[float, float] | list[str] | dict[str, float]]
90
83
  ) -> None:
91
84
  """Initialize the Rescale object.
92
85
 
@@ -129,7 +122,7 @@ class Rescale(Forwards):
129
122
  """
130
123
  return Node(self, [self.forward.tree()], rescale=self.rescale)
131
124
 
132
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
125
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
133
126
  """Get the metadata specific to the rescale subclass.
134
127
 
135
128
  Returns
@@ -204,7 +197,7 @@ class Rescale(Forwards):
204
197
  return data * self._a[0] + self._b[0]
205
198
 
206
199
  @cached_property
207
- def statistics(self) -> Dict[str, NDArray[Any]]:
200
+ def statistics(self) -> dict[str, NDArray[Any]]:
208
201
  """Get the statistics of the rescaled data."""
209
202
  result = {}
210
203
  a = self._a.squeeze()
@@ -224,7 +217,7 @@ class Rescale(Forwards):
224
217
 
225
218
  return result
226
219
 
227
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
220
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
228
221
  """Get the tendencies of the statistics of the rescaled data.
229
222
 
230
223
  Parameters
@@ -12,9 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Optional
18
15
 
19
16
  from numpy.typing import NDArray
20
17
 
@@ -37,7 +34,7 @@ LOG = logging.getLogger(__name__)
37
34
  class Select(Forwards):
38
35
  """Class to select a subset of variables from a dataset."""
39
36
 
40
- def __init__(self, dataset: Dataset, indices: List[int], reason: Dict[str, Any]) -> None:
37
+ def __init__(self, dataset: Dataset, indices: list[int], reason: dict[str, Any]) -> None:
41
38
  """Initialize the Select class.
42
39
 
43
40
  Parameters
@@ -140,26 +137,26 @@ class Select(Forwards):
140
137
  return (len(self), len(self.indices)) + self.dataset.shape[2:]
141
138
 
142
139
  @cached_property
143
- def variables(self) -> List[str]:
140
+ def variables(self) -> list[str]:
144
141
  """Get the variables of the dataset."""
145
142
  return [self.dataset.variables[i] for i in self.indices]
146
143
 
147
144
  @cached_property
148
- def variables_metadata(self) -> Dict[str, Any]:
145
+ def variables_metadata(self) -> dict[str, Any]:
149
146
  """Get the metadata of the variables."""
150
147
  return {k: v for k, v in self.dataset.variables_metadata.items() if k in self.variables}
151
148
 
152
149
  @cached_property
153
- def name_to_index(self) -> Dict[str, int]:
150
+ def name_to_index(self) -> dict[str, int]:
154
151
  """Get the mapping of variable names to indices."""
155
152
  return {k: i for i, k in enumerate(self.variables)}
156
153
 
157
154
  @cached_property
158
- def statistics(self) -> Dict[str, NDArray[Any]]:
155
+ def statistics(self) -> dict[str, NDArray[Any]]:
159
156
  """Get the statistics of the dataset."""
160
157
  return {k: v[self.indices] for k, v in self.dataset.statistics.items()}
161
158
 
162
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
159
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
163
160
  """Get the statistical tendencies of the dataset.
164
161
 
165
162
  Parameters
@@ -176,7 +173,7 @@ class Select(Forwards):
176
173
  delta = self.frequency
177
174
  return {k: v[self.indices] for k, v in self.dataset.statistics_tendencies(delta).items()}
178
175
 
179
- def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
176
+ def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
180
177
  """Get the specific metadata of the dataset.
181
178
 
182
179
  Parameters
@@ -216,7 +213,7 @@ class Select(Forwards):
216
213
  """
217
214
  return Node(self, [self.dataset.tree()], **self.reason)
218
215
 
219
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
216
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
220
217
  """Get the metadata specific to the subclass.
221
218
 
222
219
  Returns
@@ -231,7 +228,7 @@ class Select(Forwards):
231
228
  class Rename(Forwards):
232
229
  """Class to rename variables in a dataset."""
233
230
 
234
- def __init__(self, dataset: Dataset, rename: Dict[str, str]) -> None:
231
+ def __init__(self, dataset: Dataset, rename: dict[str, str]) -> None:
235
232
  """Initialize the Rename class.
236
233
 
237
234
  Parameters
@@ -251,17 +248,17 @@ class Rename(Forwards):
251
248
  self.rename = rename
252
249
 
253
250
  @property
254
- def variables(self) -> List[str]:
251
+ def variables(self) -> list[str]:
255
252
  """Get the renamed variables."""
256
253
  return self._variables
257
254
 
258
255
  @property
259
- def variables_metadata(self) -> Dict[str, Any]:
256
+ def variables_metadata(self) -> dict[str, Any]:
260
257
  """Get the renamed variables metadata."""
261
258
  return self._variables_metadata
262
259
 
263
260
  @cached_property
264
- def name_to_index(self) -> Dict[str, int]:
261
+ def name_to_index(self) -> dict[str, int]:
265
262
  """Get the mapping of renamed variable names to indices."""
266
263
  return {k: i for i, k in enumerate(self.variables)}
267
264
 
@@ -273,7 +270,7 @@ class Rename(Forwards):
273
270
  """
274
271
  return Node(self, [self.forward.tree()], rename=self.rename)
275
272
 
276
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
273
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
277
274
  """Get the metadata specific to the subclass.
278
275
 
279
276
  Returns:
@@ -12,9 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import Optional
17
- from typing import Set
18
15
 
19
16
  from numpy.typing import NDArray
20
17
 
@@ -56,11 +53,11 @@ class Statistics(Forwards):
56
53
  )
57
54
 
58
55
  @cached_property
59
- def statistics(self) -> Dict[str, NDArray[Any]]:
56
+ def statistics(self) -> dict[str, NDArray[Any]]:
60
57
  """Get the statistics."""
61
58
  return self._statistic.statistics
62
59
 
63
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
60
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
64
61
  """Get the statistics tendencies.
65
62
 
66
63
  Parameters
@@ -77,7 +74,7 @@ class Statistics(Forwards):
77
74
  delta = self.frequency
78
75
  return self._statistic.statistics_tendencies(delta)
79
76
 
80
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
77
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
81
78
  """Get the metadata specific to the forwards subclass.
82
79
 
83
80
  Returns
@@ -97,7 +94,7 @@ class Statistics(Forwards):
97
94
  """
98
95
  return Node(self, [self.forward.tree()])
99
96
 
100
- def get_dataset_names(self, names: Set[str]) -> None:
97
+ def get_dataset_names(self, names: set[str]) -> None:
101
98
  """Get the dataset names.
102
99
 
103
100
  Parameters
@@ -15,11 +15,6 @@ import tempfile
15
15
  import warnings
16
16
  from functools import cached_property
17
17
  from typing import Any
18
- from typing import Dict
19
- from typing import List
20
- from typing import Optional
21
- from typing import Set
22
- from typing import Union
23
18
  from urllib.parse import urlparse
24
19
 
25
20
  import numpy as np
@@ -90,7 +85,7 @@ class S3Store(ReadOnlyStore):
90
85
  options using the anemoi configs.
91
86
  """
92
87
 
93
- def __init__(self, url: str, region: Optional[str] = None) -> None:
88
+ def __init__(self, url: str, region: str | None = None) -> None:
94
89
  """Initialize the S3Store with a URL and optional region."""
95
90
  from anemoi.utils.remote.s3 import s3_client
96
91
 
@@ -107,51 +102,6 @@ class S3Store(ReadOnlyStore):
107
102
  return response["Body"].read()
108
103
 
109
104
 
110
- class PlanetaryComputerStore(ReadOnlyStore):
111
- """We write our own Store to access catalogs on Planetary Computer,
112
- as it requires some extra arguments to use xr.open_zarr.
113
- """
114
-
115
- def __init__(self, data_catalog_id: str) -> None:
116
- """Initialize the PlanetaryComputerStore with a data catalog ID.
117
-
118
- Parameters
119
- ----------
120
- data_catalog_id : str
121
- The data catalog ID.
122
- """
123
- self.data_catalog_id = data_catalog_id
124
-
125
- import planetary_computer
126
- import pystac_client
127
-
128
- catalog = pystac_client.Client.open(
129
- "https://planetarycomputer.microsoft.com/api/stac/v1/",
130
- modifier=planetary_computer.sign_inplace,
131
- )
132
- collection = catalog.get_collection(self.data_catalog_id)
133
-
134
- asset = collection.assets["zarr-abfs"]
135
-
136
- if "xarray:storage_options" in asset.extra_fields:
137
- store = {
138
- "store": asset.href,
139
- "storage_options": asset.extra_fields["xarray:storage_options"],
140
- **asset.extra_fields["xarray:open_kwargs"],
141
- }
142
- else:
143
- store = {
144
- "filename_or_obj": asset.href,
145
- **asset.extra_fields["xarray:open_kwargs"],
146
- }
147
-
148
- self.store = store
149
-
150
- def __getitem__(self, key: str) -> bytes:
151
- """Retrieve an item from the store."""
152
- raise NotImplementedError()
153
-
154
-
155
105
  class DebugStore(ReadOnlyStore):
156
106
  """A store to debug the zarr loading."""
157
107
 
@@ -190,11 +140,11 @@ def name_to_zarr_store(path_or_url: str) -> ReadOnlyStore:
190
140
 
191
141
  if store.startswith("http://") or store.startswith("https://"):
192
142
 
193
- parsed = urlparse(store)
194
-
195
143
  if store.endswith(".zip"):
196
144
  import multiurl
197
145
 
146
+ parsed = urlparse(store)
147
+
198
148
  # Zarr cannot handle zip files over HTTP
199
149
  tmpdir = tempfile.gettempdir()
200
150
  name = os.path.basename(parsed.path)
@@ -210,15 +160,7 @@ def name_to_zarr_store(path_or_url: str) -> ReadOnlyStore:
210
160
  os.rename(path + ".tmp", path)
211
161
  return name_to_zarr_store(path)
212
162
 
213
- bits = parsed.netloc.split(".")
214
- if len(bits) == 5 and (bits[1], bits[3], bits[4]) == ("s3", "amazonaws", "com"):
215
- s3_url = f"s3://{bits[0]}{parsed.path}"
216
- store = S3Store(s3_url, region=bits[2])
217
- elif store.startswith("https://planetarycomputer.microsoft.com/"):
218
- data_catalog_id = store.rsplit("/", 1)[-1]
219
- store = PlanetaryComputerStore(data_catalog_id).store
220
- else:
221
- store = HTTPStore(store)
163
+ return HTTPStore(store)
222
164
 
223
165
  return store
224
166
 
@@ -252,7 +194,7 @@ def open_zarr(path: str, dont_fail: bool = False, cache: int = None) -> zarr.hie
252
194
  class Zarr(Dataset):
253
195
  """A zarr dataset."""
254
196
 
255
- def __init__(self, path: Union[str, zarr.hierarchy.Group]) -> None:
197
+ def __init__(self, path: str | zarr.hierarchy.Group) -> None:
256
198
  """Initialize the Zarr dataset with a path or zarr group."""
257
199
  if isinstance(path, zarr.hierarchy.Group):
258
200
  self.was_zarr = True
@@ -268,7 +210,7 @@ class Zarr(Dataset):
268
210
  self._missing = set()
269
211
 
270
212
  @property
271
- def missing(self) -> Set[int]:
213
+ def missing(self) -> set[int]:
272
214
  """Return the missing dates of the dataset."""
273
215
  return self._missing
274
216
 
@@ -289,7 +231,7 @@ class Zarr(Dataset):
289
231
  """Retrieve an item from the dataset."""
290
232
  return self.data[n]
291
233
 
292
- def _unwind(self, index: Union[int, slice, list, tuple], rest: list, shape: tuple, axis: int, axes: list) -> iter:
234
+ def _unwind(self, index: int | slice | list | tuple, rest: list, shape: tuple, axis: int, axes: list) -> iter:
293
235
  """Unwind the index for multi-dimensional indexing."""
294
236
  if not isinstance(index, (int, slice, list, tuple)):
295
237
  try:
@@ -351,7 +293,7 @@ class Zarr(Dataset):
351
293
  return self.z.longitude[:]
352
294
 
353
295
  @property
354
- def statistics(self) -> Dict[str, NDArray[Any]]:
296
+ def statistics(self) -> dict[str, NDArray[Any]]:
355
297
  """Return the statistics of the dataset."""
356
298
  return dict(
357
299
  mean=self.z.mean[:],
@@ -360,7 +302,7 @@ class Zarr(Dataset):
360
302
  minimum=self.z.minimum[:],
361
303
  )
362
304
 
363
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
305
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
364
306
  """Return the statistical tendencies of the dataset."""
365
307
  if delta is None:
366
308
  delta = self.frequency
@@ -407,14 +349,14 @@ class Zarr(Dataset):
407
349
  return dates[1].astype(object) - dates[0].astype(object)
408
350
 
409
351
  @property
410
- def name_to_index(self) -> Dict[str, int]:
352
+ def name_to_index(self) -> dict[str, int]:
411
353
  """Return the name to index mapping of the dataset."""
412
354
  if "variables" in self.z.attrs:
413
355
  return {n: i for i, n in enumerate(self.z.attrs["variables"])}
414
356
  return self.z.attrs["name_to_index"]
415
357
 
416
358
  @property
417
- def variables(self) -> List[str]:
359
+ def variables(self) -> list[str]:
418
360
  """Return the variables of the dataset."""
419
361
  return [
420
362
  k
@@ -425,7 +367,7 @@ class Zarr(Dataset):
425
367
  ]
426
368
 
427
369
  @cached_property
428
- def constant_fields(self) -> List[str]:
370
+ def constant_fields(self) -> list[str]:
429
371
  """Return the constant fields of the dataset."""
430
372
  result = self.z.attrs.get("constant_fields")
431
373
  if result is None:
@@ -433,7 +375,7 @@ class Zarr(Dataset):
433
375
  return self.computed_constant_fields()
434
376
 
435
377
  @property
436
- def variables_metadata(self) -> Dict[str, Any]:
378
+ def variables_metadata(self) -> dict[str, Any]:
437
379
  """Return the metadata of the variables."""
438
380
  return self.z.attrs.get("variables_metadata", {})
439
381
 
@@ -445,7 +387,7 @@ class Zarr(Dataset):
445
387
  """Return the end date of the statistics."""
446
388
  return self.dates[-1]
447
389
 
448
- def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
390
+ def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
449
391
  """Return the specific metadata of the dataset."""
450
392
  return super().metadata_specific(
451
393
  attrs=dict(self.z.attrs),
@@ -469,7 +411,7 @@ class Zarr(Dataset):
469
411
  """Return the tree representation of the dataset."""
470
412
  return Node(self, [], path=self.path)
471
413
 
472
- def get_dataset_names(self, names: Set[str]) -> None:
414
+ def get_dataset_names(self, names: set[str]) -> None:
473
415
  """Get the names of the datasets."""
474
416
  name, _ = os.path.splitext(os.path.basename(self.path))
475
417
  names.add(name)
@@ -486,17 +428,17 @@ class Zarr(Dataset):
486
428
  class ZarrWithMissingDates(Zarr):
487
429
  """A zarr dataset with missing dates."""
488
430
 
489
- def __init__(self, path: Union[str, zarr.hierarchy.Group]) -> None:
431
+ def __init__(self, path: str | zarr.hierarchy.Group) -> None:
490
432
  """Initialize the ZarrWithMissingDates dataset with a path or zarr group."""
491
433
  super().__init__(path)
492
434
 
493
435
  missing_dates = self.z.attrs.get("missing_dates", [])
494
- missing_dates = set([np.datetime64(x, "s") for x in missing_dates])
436
+ missing_dates = {np.datetime64(x, "s") for x in missing_dates}
495
437
  self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
496
438
  self._missing = set(self.missing_to_dates)
497
439
 
498
440
  @property
499
- def missing(self) -> Set[int]:
441
+ def missing(self) -> set[int]:
500
442
  """Return the missing dates of the dataset."""
501
443
  return self._missing
502
444
 
@@ -559,12 +501,16 @@ class ZarrWithMissingDates(Zarr):
559
501
  QUIET = set()
560
502
 
561
503
 
562
- def zarr_lookup(name: str, fail: bool = True) -> Optional[str]:
504
+ def zarr_lookup(name: str, fail: bool = True) -> str | None:
563
505
  """Look up a zarr dataset by name."""
564
506
 
565
507
  config = load_config()["datasets"]
566
508
  use_search_path_not_found = config.get("use_search_path_not_found", False)
567
509
 
510
+ if name.endswith(".zarr/"):
511
+ LOG.warning("Removing trailing slash from path: %s", name)
512
+ name = name[:-1]
513
+
568
514
  if name.endswith(".zarr") or name.endswith(".zip"):
569
515
 
570
516
  if os.path.exists(name):