anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/validate.py +59 -0
  8. anemoi/datasets/compute/recentre.py +3 -6
  9. anemoi/datasets/create/__init__.py +22 -25
  10. anemoi/datasets/create/check.py +10 -12
  11. anemoi/datasets/create/chunks.py +1 -2
  12. anemoi/datasets/create/config.py +3 -6
  13. anemoi/datasets/create/filter.py +1 -2
  14. anemoi/datasets/create/input/__init__.py +1 -2
  15. anemoi/datasets/create/input/action.py +3 -5
  16. anemoi/datasets/create/input/concat.py +5 -8
  17. anemoi/datasets/create/input/context.py +3 -6
  18. anemoi/datasets/create/input/data_sources.py +5 -8
  19. anemoi/datasets/create/input/empty.py +1 -2
  20. anemoi/datasets/create/input/filter.py +2 -3
  21. anemoi/datasets/create/input/function.py +1 -2
  22. anemoi/datasets/create/input/join.py +4 -5
  23. anemoi/datasets/create/input/misc.py +4 -6
  24. anemoi/datasets/create/input/repeated_dates.py +13 -18
  25. anemoi/datasets/create/input/result.py +29 -33
  26. anemoi/datasets/create/input/step.py +4 -8
  27. anemoi/datasets/create/input/template.py +3 -4
  28. anemoi/datasets/create/input/trace.py +1 -1
  29. anemoi/datasets/create/patch.py +1 -2
  30. anemoi/datasets/create/persistent.py +3 -5
  31. anemoi/datasets/create/size.py +1 -3
  32. anemoi/datasets/create/sources/accumulations.py +47 -52
  33. anemoi/datasets/create/sources/accumulations2.py +4 -8
  34. anemoi/datasets/create/sources/constants.py +1 -3
  35. anemoi/datasets/create/sources/empty.py +1 -2
  36. anemoi/datasets/create/sources/fdb.py +133 -0
  37. anemoi/datasets/create/sources/forcings.py +1 -2
  38. anemoi/datasets/create/sources/grib.py +6 -10
  39. anemoi/datasets/create/sources/grib_index.py +13 -15
  40. anemoi/datasets/create/sources/hindcasts.py +2 -5
  41. anemoi/datasets/create/sources/legacy.py +1 -1
  42. anemoi/datasets/create/sources/mars.py +17 -21
  43. anemoi/datasets/create/sources/netcdf.py +1 -2
  44. anemoi/datasets/create/sources/opendap.py +1 -3
  45. anemoi/datasets/create/sources/patterns.py +4 -6
  46. anemoi/datasets/create/sources/recentre.py +8 -11
  47. anemoi/datasets/create/sources/source.py +3 -6
  48. anemoi/datasets/create/sources/tendencies.py +2 -5
  49. anemoi/datasets/create/sources/xarray.py +4 -6
  50. anemoi/datasets/create/sources/xarray_support/__init__.py +12 -13
  51. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  52. anemoi/datasets/create/sources/xarray_support/field.py +16 -12
  53. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  54. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  55. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  56. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  57. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  58. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  59. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  60. anemoi/datasets/create/sources/xarray_zarr.py +1 -2
  61. anemoi/datasets/create/sources/zenodo.py +3 -5
  62. anemoi/datasets/create/statistics/__init__.py +3 -6
  63. anemoi/datasets/create/testing.py +4 -0
  64. anemoi/datasets/create/typing.py +1 -2
  65. anemoi/datasets/create/utils.py +1 -2
  66. anemoi/datasets/create/zarr.py +7 -2
  67. anemoi/datasets/data/__init__.py +15 -6
  68. anemoi/datasets/data/complement.py +7 -12
  69. anemoi/datasets/data/concat.py +5 -8
  70. anemoi/datasets/data/dataset.py +42 -47
  71. anemoi/datasets/data/debug.py +7 -9
  72. anemoi/datasets/data/ensemble.py +4 -6
  73. anemoi/datasets/data/fill_missing.py +7 -10
  74. anemoi/datasets/data/forwards.py +22 -26
  75. anemoi/datasets/data/grids.py +12 -16
  76. anemoi/datasets/data/indexing.py +9 -12
  77. anemoi/datasets/data/interpolate.py +7 -15
  78. anemoi/datasets/data/join.py +8 -12
  79. anemoi/datasets/data/masked.py +6 -11
  80. anemoi/datasets/data/merge.py +5 -9
  81. anemoi/datasets/data/misc.py +41 -45
  82. anemoi/datasets/data/missing.py +11 -16
  83. anemoi/datasets/data/observations/__init__.py +8 -14
  84. anemoi/datasets/data/padded.py +3 -5
  85. anemoi/datasets/data/records/backends/__init__.py +2 -2
  86. anemoi/datasets/data/rescale.py +5 -12
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +16 -21
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +13 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/grids.py +5 -9
  96. anemoi/datasets/testing.py +93 -7
  97. anemoi/datasets/validate.py +598 -0
  98. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +4 -4
  99. anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
  100. anemoi/datasets/utils/__init__.py +0 -8
  101. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  102. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
  103. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
  104. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
  105. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
@@ -15,11 +15,6 @@ import os
15
15
  from pathlib import PurePath
16
16
  from typing import TYPE_CHECKING
17
17
  from typing import Any
18
- from typing import Dict
19
- from typing import List
20
- from typing import Optional
21
- from typing import Tuple
22
- from typing import Union
23
18
 
24
19
  import numpy as np
25
20
  import zarr
@@ -33,7 +28,7 @@ if TYPE_CHECKING:
33
28
  LOG = logging.getLogger(__name__)
34
29
 
35
30
 
36
- def load_config() -> Dict[str, Any]:
31
+ def load_config() -> dict[str, Any]:
37
32
  """Load the configuration settings.
38
33
 
39
34
  Returns
@@ -110,10 +105,10 @@ def round_datetime(d: np.datetime64, dates: NDArray[np.datetime64], up: bool) ->
110
105
 
111
106
 
112
107
  def _as_date(
113
- d: Union[int, str, np.datetime64, datetime.date],
108
+ d: int | str | np.datetime64 | datetime.date,
114
109
  dates: NDArray[np.datetime64],
115
110
  last: bool,
116
- frequency: Optional[datetime.timedelta] = None,
111
+ frequency: datetime.timedelta | None = None,
117
112
  ) -> np.datetime64:
118
113
  """Convert a date to a numpy datetime64 object, rounding to the nearest date in a list of dates.
119
114
 
@@ -221,8 +216,8 @@ def _as_date(
221
216
 
222
217
  if "-" in d and ":" in d:
223
218
  date, time = d.replace(" ", "T").split("T")
224
- year, month, day = [int(_) for _ in date.split("-")]
225
- hour, minute, second = [int(_) for _ in time.split(":")]
219
+ year, month, day = (int(_) for _ in date.split("-"))
220
+ hour, minute, second = (int(_) for _ in time.split(":"))
226
221
  return _as_date(
227
222
  np.datetime64(f"{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}"),
228
223
  dates,
@@ -258,9 +253,9 @@ def _as_date(
258
253
 
259
254
 
260
255
  def as_first_date(
261
- d: Union[int, str, np.datetime64, datetime.date],
256
+ d: int | str | np.datetime64 | datetime.date,
262
257
  dates: NDArray[np.datetime64],
263
- frequency: Optional[datetime.timedelta] = None,
258
+ frequency: datetime.timedelta | None = None,
264
259
  ) -> np.datetime64:
265
260
  """Convert a date to the first date in a list of dates.
266
261
 
@@ -282,9 +277,9 @@ def as_first_date(
282
277
 
283
278
 
284
279
  def as_last_date(
285
- d: Union[int, str, np.datetime64, datetime.date],
280
+ d: int | str | np.datetime64 | datetime.date,
286
281
  dates: NDArray[np.datetime64],
287
- frequency: Optional[datetime.timedelta] = None,
282
+ frequency: datetime.timedelta | None = None,
288
283
  ) -> np.datetime64:
289
284
  """Convert a date to the last date in a list of dates.
290
285
 
@@ -305,7 +300,7 @@ def as_last_date(
305
300
  return _as_date(d, dates, last=True, frequency=frequency)
306
301
 
307
302
 
308
- def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple["Dataset", Dict[str, Any]]:
303
+ def _concat_or_join(datasets: list["Dataset"], kwargs: dict[str, Any]) -> tuple["Dataset", dict[str, Any]]:
309
304
  """Concatenate or join datasets based on their date ranges.
310
305
 
311
306
  Parameters
@@ -317,7 +312,7 @@ def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple[
317
312
 
318
313
  Returns
319
314
  -------
320
- Tuple[Dataset, Dict[str, Any]]
315
+ tuple[Dataset, Dict[str, Any]]
321
316
  The concatenated or joined dataset and remaining arguments.
322
317
  """
323
318
  if "adjust" in kwargs:
@@ -339,12 +334,12 @@ def _concat_or_join(datasets: List["Dataset"], kwargs: Dict[str, Any]) -> Tuple[
339
334
  return Concat(datasets), kwargs
340
335
 
341
336
 
342
- def _open(a: Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]) -> "Dataset":
337
+ def _open(a: str | PurePath | dict[str, Any] | list[Any] | tuple[Any, ...]) -> "Dataset":
343
338
  """Open a dataset from various input types.
344
339
 
345
340
  Parameters
346
341
  ----------
347
- a : Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]
342
+ a : Union[str, PurePath, Dict[str, Any], List[Any], tuple[Any, ...]]
348
343
  The input to open.
349
344
 
350
345
  Returns
@@ -390,10 +385,10 @@ def _open(a: Union[str, PurePath, Dict[str, Any], List[Any], Tuple[Any, ...]]) -
390
385
 
391
386
 
392
387
  def _auto_adjust(
393
- datasets: List["Dataset"],
394
- kwargs: Dict[str, Any],
395
- exclude: Optional[List[str]] = None,
396
- ) -> Tuple[List["Dataset"], Dict[str, Any]]:
388
+ datasets: list["Dataset"],
389
+ kwargs: dict[str, Any],
390
+ exclude: list[str] | None = None,
391
+ ) -> tuple[list["Dataset"], dict[str, Any]]:
397
392
  """Automatically adjust datasets based on specified criteria.
398
393
 
399
394
  Parameters
@@ -407,7 +402,7 @@ def _auto_adjust(
407
402
 
408
403
  Returns
409
404
  -------
410
- Tuple[List[Dataset], Dict[str, Any]]
405
+ tuple[List[Dataset], Dict[str, Any]]
411
406
  The adjusted datasets and remaining arguments.
412
407
  """
413
408
  if "adjust" not in kwargs:
@@ -620,7 +615,7 @@ def append_to_zarr(new_data: np.ndarray, new_dates: np.ndarray, zarr_path: str)
620
615
  # Re-open the zarr store to avoid root object accumulating memory.
621
616
  root = zarr.open(zarr_path, mode="a")
622
617
  # Convert new dates to strings (using str) regardless of input dtype.
623
- new_dates = np.array(new_dates, dtype="datetime64[ns]")
618
+ new_dates = np.array(new_dates, dtype="datetime64[s]")
624
619
  dates_ds = root["dates"]
625
620
  old_len = dates_ds.shape[0]
626
621
  dates_ds.resize((old_len + len(new_dates),))
@@ -633,19 +628,19 @@ def append_to_zarr(new_data: np.ndarray, new_dates: np.ndarray, zarr_path: str)
633
628
  data_ds[old_shape[0] :] = new_data
634
629
 
635
630
 
636
- def process_date(date: Any, big_dataset: Any) -> Tuple[np.ndarray, np.ndarray]:
631
+ def process_date(date: Any, big_dataset: "Dataset") -> tuple[np.ndarray, np.ndarray]:
637
632
  """Open the subset corresponding to the given date and return (date, subset).
638
633
 
639
634
  Parameters
640
635
  ----------
641
636
  date : Any
642
637
  The date to process.
643
- big_dataset : Any
638
+ big_dataset : Dataset
644
639
  The dataset to process.
645
640
 
646
641
  Returns
647
642
  -------
648
- Tuple[np.ndarray, np.ndarray]
643
+ tuple[np.ndarray, np.ndarray]
649
644
  The subset and the date.
650
645
  """
651
646
  print("Processing:", date, flush=True)
@@ -655,26 +650,24 @@ def process_date(date: Any, big_dataset: Any) -> Tuple[np.ndarray, np.ndarray]:
655
650
  return s, date
656
651
 
657
652
 
658
- def initialize_zarr_store(root: Any, big_dataset: Any, recipe: Dict[str, Any]) -> None:
653
+ def initialize_zarr_store(root: Any, big_dataset: "Dataset") -> None:
659
654
  """Initialize the Zarr store with the given dataset and recipe.
660
655
 
661
656
  Parameters
662
657
  ----------
663
658
  root : Any
664
- The root of the Zarr store.
665
- big_dataset : Any
659
+ The root Zarr store.
660
+ big_dataset : Dataset
666
661
  The dataset to initialize the store with.
667
- recipe : Dict[str, Any]
668
- The recipe for initializing the store.
669
662
  """
670
- ensembles = big_dataset.shape[1]
663
+ ensembles = big_dataset.shape[2]
671
664
  # Create or append to "dates" dataset.
672
665
  if "dates" not in root:
673
666
  full_length = len(big_dataset.dates)
674
667
  root.create_dataset("dates", data=np.array([], dtype="datetime64[s]"), chunks=(full_length,))
675
668
 
676
669
  if "data" not in root:
677
- dims = (1, len(big_dataset.variables), ensembles, big_dataset.grids[0])
670
+ dims = (1, len(big_dataset.variables), ensembles, big_dataset.shape[-1])
678
671
  root.create_dataset(
679
672
  "data",
680
673
  shape=dims,
@@ -694,25 +687,28 @@ def initialize_zarr_store(root: Any, big_dataset: Any, recipe: Dict[str, Any]) -
694
687
  if "latitudes" not in root or "longitudes" not in root:
695
688
  root.create_dataset("latitudes", data=big_dataset.latitudes, compressor=None)
696
689
  root.create_dataset("longitudes", data=big_dataset.longitudes, compressor=None)
697
-
690
+ for k, v in big_dataset.metadata().items():
691
+ if k not in root.attrs:
692
+ root.attrs[k] = v
698
693
  # Set store-wide attributes if not already set.
699
- if "frequency" not in root.attrs:
700
- root.attrs["frequency"] = "10m"
701
- root.attrs["resolution"] = "1km"
694
+ if "first_date" not in root.attrs:
695
+ root.attrs["first_date"] = big_dataset.metadata()["start_date"]
696
+ root.attrs["last_date"] = big_dataset.metadata()["end_date"]
697
+ root.attrs["resolution"] = big_dataset.resolution
702
698
  root.attrs["name_to_index"] = {k: i for i, k in enumerate(big_dataset.variables)}
703
- root.attrs["ensemble_dimension"] = 1
699
+ root.attrs["ensemble_dimension"] = 2
704
700
  root.attrs["field_shape"] = big_dataset.field_shape
705
701
  root.attrs["flatten_grid"] = True
706
- root.attrs["recipe"] = recipe
702
+ root.attrs["recipe"] = {}
707
703
 
708
704
 
709
- def _save_dataset(recipe: Dict[str, Any], zarr_path: str, n_workers: int = 1) -> None:
705
+ def _save_dataset(dataset: "Dataset", zarr_path: str, n_workers: int = 1) -> None:
710
706
  """Incrementally create (or update) a Zarr store from an Anemoi dataset.
711
707
 
712
708
  Parameters
713
709
  ----------
714
- recipe : Dict[str, Any]
715
- The recipe for creating the dataset.
710
+ dataset : Dataset
711
+ anemoi-dataset opened from python to save to Zarr store
716
712
  zarr_path : str
717
713
  The path to the Zarr store.
718
714
  n_workers : int, optional
@@ -728,13 +724,13 @@ def _save_dataset(recipe: Dict[str, Any], zarr_path: str, n_workers: int = 1) ->
728
724
  """
729
725
  from concurrent.futures import ProcessPoolExecutor
730
726
 
731
- full_ds = _open_dataset(recipe).mutate()
727
+ full_ds = dataset
732
728
  print("Opened full dataset.", flush=True)
733
729
 
734
730
  # Use ProcessPoolExecutor for parallel data extraction.
735
731
  # Workers return (date, subset) tuples.
736
732
  root = zarr.open(zarr_path, mode="a")
737
- initialize_zarr_store(root, full_ds, recipe)
733
+ initialize_zarr_store(root, full_ds)
738
734
  print("Zarr store initialised.", flush=True)
739
735
 
740
736
  existing_dates = np.array(sorted(root["dates"]), dtype="datetime64[s]")
@@ -12,11 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Set
18
- from typing import Tuple
19
- from typing import Union
20
15
 
21
16
  import numpy as np
22
17
  from numpy.typing import NDArray
@@ -49,7 +44,7 @@ class MissingDates(Forwards):
49
44
  List of missing dates.
50
45
  """
51
46
 
52
- def __init__(self, dataset: Dataset, missing_dates: List[Union[int, str]]) -> None:
47
+ def __init__(self, dataset: Dataset, missing_dates: list[int | str]) -> None:
53
48
  """Initializes the MissingDates class.
54
49
 
55
50
  Parameters
@@ -80,13 +75,13 @@ class MissingDates(Forwards):
80
75
  self.missing_dates.append(date)
81
76
 
82
77
  n = self.forward._len
83
- self._missing = set(i for i in self._missing if 0 <= i < n)
78
+ self._missing = {i for i in self._missing if 0 <= i < n}
84
79
  self.missing_dates = sorted(to_datetime(x) for x in self.missing_dates)
85
80
 
86
81
  assert len(self._missing), "No dates to force missing"
87
82
 
88
83
  @cached_property
89
- def missing(self) -> Set[int]:
84
+ def missing(self) -> set[int]:
90
85
  """Returns the set of missing indices."""
91
86
  return self._missing.union(self.forward.missing)
92
87
 
@@ -148,7 +143,7 @@ class MissingDates(Forwards):
148
143
  raise MissingDateError(f"Date {self.forward.dates[n]} is missing (index={n})")
149
144
 
150
145
  @property
151
- def reason(self) -> Dict[str, Any]:
146
+ def reason(self) -> dict[str, Any]:
152
147
  """Provides the reason for missing dates."""
153
148
  return {"missing_dates": self.missing_dates}
154
149
 
@@ -162,7 +157,7 @@ class MissingDates(Forwards):
162
157
  """
163
158
  return Node(self, [self.forward.tree()], **self.reason)
164
159
 
165
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
160
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
166
161
  """Provides metadata specific to the subclass.
167
162
 
168
163
  Returns
@@ -184,7 +179,7 @@ class SkipMissingDates(Forwards):
184
179
  The expected access pattern.
185
180
  """
186
181
 
187
- def __init__(self, dataset: Dataset, expected_access: Union[int, slice]) -> None:
182
+ def __init__(self, dataset: Dataset, expected_access: int | slice) -> None:
188
183
  """Initializes the SkipMissingDates class.
189
184
 
190
185
  Parameters
@@ -285,7 +280,7 @@ class SkipMissingDates(Forwards):
285
280
  return tuple(np.stack(_) for _ in result)
286
281
 
287
282
  @debug_indexing
288
- def _get_slice(self, s: slice) -> Tuple[NDArray[Any], ...]:
283
+ def _get_slice(self, s: slice) -> tuple[NDArray[Any], ...]:
289
284
  """Retrieves a slice of items.
290
285
 
291
286
  Parameters
@@ -303,7 +298,7 @@ class SkipMissingDates(Forwards):
303
298
  return tuple(np.stack(_) for _ in result)
304
299
 
305
300
  @debug_indexing
306
- def __getitem__(self, n: FullIndex) -> Tuple[NDArray[Any], ...]:
301
+ def __getitem__(self, n: FullIndex) -> tuple[NDArray[Any], ...]:
307
302
  """Retrieves the item at the given index.
308
303
 
309
304
  Parameters
@@ -339,7 +334,7 @@ class SkipMissingDates(Forwards):
339
334
  """
340
335
  return Node(self, [self.forward.tree()], expected_access=self.expected_access)
341
336
 
342
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
337
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
343
338
  """Provides metadata specific to the subclass.
344
339
 
345
340
  Returns
@@ -404,7 +399,7 @@ class MissingDataset(Forwards):
404
399
  return self._dates
405
400
 
406
401
  @property
407
- def missing(self) -> Set[int]:
402
+ def missing(self) -> set[int]:
408
403
  """Returns the set of missing indices."""
409
404
  return self._missing
410
405
 
@@ -436,7 +431,7 @@ class MissingDataset(Forwards):
436
431
  """
437
432
  return Node(self, [self.forward.tree()], start=self.start, end=self.end)
438
433
 
439
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
434
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
440
435
  """Provides metadata specific to the subclass.
441
436
 
442
437
  Returns
@@ -10,8 +10,6 @@ import logging
10
10
  import os
11
11
  from functools import cached_property
12
12
  from typing import Any
13
- from typing import Dict
14
- from typing import Tuple
15
13
 
16
14
  import numpy as np
17
15
  from anemoi.utils.dates import frequency_to_timedelta
@@ -82,10 +80,8 @@ class ObservationsBase(Dataset):
82
80
  # return [self.getitem(j) for j in i]
83
81
 
84
82
  raise ValueError(
85
- (
86
- f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
87
- "observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
88
- )
83
+ f"Expected int, got {i} of type {type(i)}. Only int is supported to index "
84
+ "observations datasets. Please use a second [] to select part of the data [i][a,b,c]"
89
85
  )
90
86
 
91
87
  @property
@@ -195,13 +191,11 @@ class ObservationsZarr(ObservationsBase):
195
191
 
196
192
  if len(self.forward) != len(self.dates):
197
193
  raise ValueError(
198
- (
199
- f"Dates are not consistent with the number of items in the dataset. "
200
- f"The dataset contains {len(self.forward)} time windows. "
201
- f"This is not compatible with the "
202
- f"{len(self.dates)} requested dates with frequency={frequency_hours}"
203
- f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
204
- )
194
+ f"Dates are not consistent with the number of items in the dataset. "
195
+ f"The dataset contains {len(self.forward)} time windows. "
196
+ f"This is not compatible with the "
197
+ f"{len(self.dates)} requested dates with frequency={frequency_hours}"
198
+ f"{self.dates[0]}, {self.dates[1]}, ..., {self.dates[-2]}, {self.dates[-1]} "
205
199
  )
206
200
 
207
201
  @property
@@ -307,7 +301,7 @@ class ObservationsZarr(ObservationsBase):
307
301
  return f"Observations({os.path.basename(self.path)}, {self.dates[0]};{self.dates[-1]}, {len(self)})"
308
302
 
309
303
 
310
- def observations_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> ObservationsBase:
304
+ def observations_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> ObservationsBase:
311
305
  observations = kwargs.pop("observations")
312
306
 
313
307
  if not isinstance(observations, dict):
@@ -12,8 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import Set
17
15
 
18
16
  import numpy as np
19
17
  from anemoi.utils.dates import frequency_to_timedelta
@@ -38,7 +36,7 @@ class Padded(Forwards):
38
36
  _after: int = 0
39
37
  _inside: int = 0
40
38
 
41
- def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: Dict[str, Any]) -> None:
39
+ def __init__(self, dataset: Dataset, start: str, end: str, frequency: str, reason: dict[str, Any]) -> None:
42
40
  """Create a padded subset of a dataset.
43
41
 
44
42
  Attributes:
@@ -195,7 +193,7 @@ class Padded(Forwards):
195
193
  return (len(self.dates),) + self.dataset.shape[1:]
196
194
 
197
195
  @cached_property
198
- def missing(self) -> Set[int]:
196
+ def missing(self) -> set[int]:
199
197
  raise NotImplementedError("Need to decide whether to include the added dates as missing or not")
200
198
  # return self.forward.missing
201
199
 
@@ -207,7 +205,7 @@ class Padded(Forwards):
207
205
  """
208
206
  return Node(self, [self.dataset.tree()], **self.reason)
209
207
 
210
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
208
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
211
209
  """Get the metadata specific to the forwards subclass.
212
210
 
213
211
  Returns:
@@ -35,7 +35,7 @@ class Npz1Backend(Backend):
35
35
  return dict(np.load(f))
36
36
 
37
37
  def read_metadata(self):
38
- with open(os.path.join(self.path, "metadata.json"), "r") as f:
38
+ with open(os.path.join(self.path, "metadata.json")) as f:
39
39
  return json.load(f)
40
40
 
41
41
  def read_statistics(self):
@@ -56,7 +56,7 @@ class Npz2Backend(Backend):
56
56
  return dict(np.load(f))
57
57
 
58
58
  def read_metadata(self):
59
- with open(os.path.join(self.path, "metadata.json"), "r") as f:
59
+ with open(os.path.join(self.path, "metadata.json")) as f:
60
60
  return json.load(f)
61
61
 
62
62
  def read_statistics(self):
@@ -12,11 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Optional
18
- from typing import Tuple
19
- from typing import Union
20
15
 
21
16
  import numpy as np
22
17
  from numpy.typing import NDArray
@@ -35,9 +30,7 @@ from .indexing import update_tuple
35
30
  LOG = logging.getLogger(__name__)
36
31
 
37
32
 
38
- def make_rescale(
39
- variable: str, rescale: Union[Tuple[float, float], List[str], Dict[str, float]]
40
- ) -> Tuple[float, float]:
33
+ def make_rescale(variable: str, rescale: tuple[float, float] | list[str] | dict[str, float]) -> tuple[float, float]:
41
34
  """Create rescale parameters (scale and offset) based on the input rescale specification.
42
35
 
43
36
  Parameters
@@ -86,7 +79,7 @@ class Rescale(Forwards):
86
79
  """A class to apply rescaling to dataset variables."""
87
80
 
88
81
  def __init__(
89
- self, dataset: Dataset, rescale: Dict[str, Union[Tuple[float, float], List[str], Dict[str, float]]]
82
+ self, dataset: Dataset, rescale: dict[str, tuple[float, float] | list[str] | dict[str, float]]
90
83
  ) -> None:
91
84
  """Initialize the Rescale object.
92
85
 
@@ -129,7 +122,7 @@ class Rescale(Forwards):
129
122
  """
130
123
  return Node(self, [self.forward.tree()], rescale=self.rescale)
131
124
 
132
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
125
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
133
126
  """Get the metadata specific to the rescale subclass.
134
127
 
135
128
  Returns
@@ -204,7 +197,7 @@ class Rescale(Forwards):
204
197
  return data * self._a[0] + self._b[0]
205
198
 
206
199
  @cached_property
207
- def statistics(self) -> Dict[str, NDArray[Any]]:
200
+ def statistics(self) -> dict[str, NDArray[Any]]:
208
201
  """Get the statistics of the rescaled data."""
209
202
  result = {}
210
203
  a = self._a.squeeze()
@@ -224,7 +217,7 @@ class Rescale(Forwards):
224
217
 
225
218
  return result
226
219
 
227
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
220
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
228
221
  """Get the tendencies of the statistics of the rescaled data.
229
222
 
230
223
  Parameters
@@ -12,9 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Optional
18
15
 
19
16
  from numpy.typing import NDArray
20
17
 
@@ -37,7 +34,7 @@ LOG = logging.getLogger(__name__)
37
34
  class Select(Forwards):
38
35
  """Class to select a subset of variables from a dataset."""
39
36
 
40
- def __init__(self, dataset: Dataset, indices: List[int], reason: Dict[str, Any]) -> None:
37
+ def __init__(self, dataset: Dataset, indices: list[int], reason: dict[str, Any]) -> None:
41
38
  """Initialize the Select class.
42
39
 
43
40
  Parameters
@@ -140,26 +137,26 @@ class Select(Forwards):
140
137
  return (len(self), len(self.indices)) + self.dataset.shape[2:]
141
138
 
142
139
  @cached_property
143
- def variables(self) -> List[str]:
140
+ def variables(self) -> list[str]:
144
141
  """Get the variables of the dataset."""
145
142
  return [self.dataset.variables[i] for i in self.indices]
146
143
 
147
144
  @cached_property
148
- def variables_metadata(self) -> Dict[str, Any]:
145
+ def variables_metadata(self) -> dict[str, Any]:
149
146
  """Get the metadata of the variables."""
150
147
  return {k: v for k, v in self.dataset.variables_metadata.items() if k in self.variables}
151
148
 
152
149
  @cached_property
153
- def name_to_index(self) -> Dict[str, int]:
150
+ def name_to_index(self) -> dict[str, int]:
154
151
  """Get the mapping of variable names to indices."""
155
152
  return {k: i for i, k in enumerate(self.variables)}
156
153
 
157
154
  @cached_property
158
- def statistics(self) -> Dict[str, NDArray[Any]]:
155
+ def statistics(self) -> dict[str, NDArray[Any]]:
159
156
  """Get the statistics of the dataset."""
160
157
  return {k: v[self.indices] for k, v in self.dataset.statistics.items()}
161
158
 
162
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
159
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
163
160
  """Get the statistical tendencies of the dataset.
164
161
 
165
162
  Parameters
@@ -176,7 +173,7 @@ class Select(Forwards):
176
173
  delta = self.frequency
177
174
  return {k: v[self.indices] for k, v in self.dataset.statistics_tendencies(delta).items()}
178
175
 
179
- def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
176
+ def metadata_specific(self, **kwargs: Any) -> dict[str, Any]:
180
177
  """Get the specific metadata of the dataset.
181
178
 
182
179
  Parameters
@@ -216,7 +213,7 @@ class Select(Forwards):
216
213
  """
217
214
  return Node(self, [self.dataset.tree()], **self.reason)
218
215
 
219
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
216
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
220
217
  """Get the metadata specific to the subclass.
221
218
 
222
219
  Returns
@@ -231,7 +228,7 @@ class Select(Forwards):
231
228
  class Rename(Forwards):
232
229
  """Class to rename variables in a dataset."""
233
230
 
234
- def __init__(self, dataset: Dataset, rename: Dict[str, str]) -> None:
231
+ def __init__(self, dataset: Dataset, rename: dict[str, str]) -> None:
235
232
  """Initialize the Rename class.
236
233
 
237
234
  Parameters
@@ -251,17 +248,17 @@ class Rename(Forwards):
251
248
  self.rename = rename
252
249
 
253
250
  @property
254
- def variables(self) -> List[str]:
251
+ def variables(self) -> list[str]:
255
252
  """Get the renamed variables."""
256
253
  return self._variables
257
254
 
258
255
  @property
259
- def variables_metadata(self) -> Dict[str, Any]:
256
+ def variables_metadata(self) -> dict[str, Any]:
260
257
  """Get the renamed variables metadata."""
261
258
  return self._variables_metadata
262
259
 
263
260
  @cached_property
264
- def name_to_index(self) -> Dict[str, int]:
261
+ def name_to_index(self) -> dict[str, int]:
265
262
  """Get the mapping of renamed variable names to indices."""
266
263
  return {k: i for i, k in enumerate(self.variables)}
267
264
 
@@ -273,7 +270,7 @@ class Rename(Forwards):
273
270
  """
274
271
  return Node(self, [self.forward.tree()], rename=self.rename)
275
272
 
276
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
273
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
277
274
  """Get the metadata specific to the subclass.
278
275
 
279
276
  Returns:
@@ -12,9 +12,6 @@ import datetime
12
12
  import logging
13
13
  from functools import cached_property
14
14
  from typing import Any
15
- from typing import Dict
16
- from typing import Optional
17
- from typing import Set
18
15
 
19
16
  from numpy.typing import NDArray
20
17
 
@@ -56,11 +53,11 @@ class Statistics(Forwards):
56
53
  )
57
54
 
58
55
  @cached_property
59
- def statistics(self) -> Dict[str, NDArray[Any]]:
56
+ def statistics(self) -> dict[str, NDArray[Any]]:
60
57
  """Get the statistics."""
61
58
  return self._statistic.statistics
62
59
 
63
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
60
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
64
61
  """Get the statistics tendencies.
65
62
 
66
63
  Parameters
@@ -77,7 +74,7 @@ class Statistics(Forwards):
77
74
  delta = self.frequency
78
75
  return self._statistic.statistics_tendencies(delta)
79
76
 
80
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
77
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
81
78
  """Get the metadata specific to the forwards subclass.
82
79
 
83
80
  Returns
@@ -97,7 +94,7 @@ class Statistics(Forwards):
97
94
  """
98
95
  return Node(self, [self.forward.tree()])
99
96
 
100
- def get_dataset_names(self, names: Set[str]) -> None:
97
+ def get_dataset_names(self, names: set[str]) -> None:
101
98
  """Get the dataset names.
102
99
 
103
100
  Parameters