anemoi-datasets 0.5.25__py3-none-any.whl → 0.5.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/grib-index.py +1 -1
  7. anemoi/datasets/commands/inspect.py +27 -35
  8. anemoi/datasets/commands/validate.py +59 -0
  9. anemoi/datasets/compute/recentre.py +3 -6
  10. anemoi/datasets/create/__init__.py +22 -25
  11. anemoi/datasets/create/check.py +10 -12
  12. anemoi/datasets/create/chunks.py +1 -2
  13. anemoi/datasets/create/config.py +3 -6
  14. anemoi/datasets/create/filter.py +21 -24
  15. anemoi/datasets/create/input/__init__.py +1 -2
  16. anemoi/datasets/create/input/action.py +3 -5
  17. anemoi/datasets/create/input/concat.py +5 -8
  18. anemoi/datasets/create/input/context.py +3 -6
  19. anemoi/datasets/create/input/data_sources.py +5 -8
  20. anemoi/datasets/create/input/empty.py +1 -2
  21. anemoi/datasets/create/input/filter.py +2 -3
  22. anemoi/datasets/create/input/function.py +1 -2
  23. anemoi/datasets/create/input/join.py +4 -5
  24. anemoi/datasets/create/input/misc.py +4 -6
  25. anemoi/datasets/create/input/repeated_dates.py +13 -18
  26. anemoi/datasets/create/input/result.py +29 -33
  27. anemoi/datasets/create/input/step.py +6 -24
  28. anemoi/datasets/create/input/template.py +3 -4
  29. anemoi/datasets/create/input/trace.py +1 -1
  30. anemoi/datasets/create/patch.py +1 -2
  31. anemoi/datasets/create/persistent.py +3 -5
  32. anemoi/datasets/create/size.py +1 -3
  33. anemoi/datasets/create/sources/accumulations.py +47 -52
  34. anemoi/datasets/create/sources/accumulations2.py +4 -8
  35. anemoi/datasets/create/sources/constants.py +1 -3
  36. anemoi/datasets/create/sources/empty.py +1 -2
  37. anemoi/datasets/create/sources/fdb.py +133 -0
  38. anemoi/datasets/create/sources/forcings.py +1 -2
  39. anemoi/datasets/create/sources/grib.py +6 -10
  40. anemoi/datasets/create/sources/grib_index.py +13 -15
  41. anemoi/datasets/create/sources/hindcasts.py +2 -5
  42. anemoi/datasets/create/sources/legacy.py +1 -1
  43. anemoi/datasets/create/sources/mars.py +17 -21
  44. anemoi/datasets/create/sources/netcdf.py +1 -2
  45. anemoi/datasets/create/sources/opendap.py +1 -3
  46. anemoi/datasets/create/sources/patterns.py +4 -6
  47. anemoi/datasets/create/sources/planetary_computer.py +44 -0
  48. anemoi/datasets/create/sources/recentre.py +8 -11
  49. anemoi/datasets/create/sources/source.py +3 -6
  50. anemoi/datasets/create/sources/tendencies.py +2 -5
  51. anemoi/datasets/create/sources/xarray.py +4 -6
  52. anemoi/datasets/create/sources/xarray_support/__init__.py +15 -32
  53. anemoi/datasets/create/sources/xarray_support/coordinates.py +16 -12
  54. anemoi/datasets/create/sources/xarray_support/field.py +17 -16
  55. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  56. anemoi/datasets/create/sources/xarray_support/flavour.py +83 -45
  57. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  58. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  59. anemoi/datasets/create/sources/xarray_support/patch.py +47 -6
  60. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  61. anemoi/datasets/create/sources/xarray_support/variable.py +27 -23
  62. anemoi/datasets/create/sources/xarray_zarr.py +1 -2
  63. anemoi/datasets/create/sources/zenodo.py +3 -5
  64. anemoi/datasets/create/statistics/__init__.py +3 -6
  65. anemoi/datasets/create/testing.py +2 -74
  66. anemoi/datasets/create/typing.py +1 -2
  67. anemoi/datasets/create/utils.py +1 -2
  68. anemoi/datasets/create/zarr.py +7 -2
  69. anemoi/datasets/data/__init__.py +15 -6
  70. anemoi/datasets/data/complement.py +52 -23
  71. anemoi/datasets/data/concat.py +5 -8
  72. anemoi/datasets/data/dataset.py +42 -47
  73. anemoi/datasets/data/debug.py +7 -9
  74. anemoi/datasets/data/ensemble.py +4 -6
  75. anemoi/datasets/data/fill_missing.py +7 -10
  76. anemoi/datasets/data/forwards.py +30 -28
  77. anemoi/datasets/data/grids.py +12 -16
  78. anemoi/datasets/data/indexing.py +9 -12
  79. anemoi/datasets/data/interpolate.py +7 -15
  80. anemoi/datasets/data/join.py +8 -12
  81. anemoi/datasets/data/masked.py +6 -11
  82. anemoi/datasets/data/merge.py +5 -9
  83. anemoi/datasets/data/misc.py +41 -45
  84. anemoi/datasets/data/missing.py +11 -16
  85. anemoi/datasets/data/observations/__init__.py +8 -14
  86. anemoi/datasets/data/padded.py +3 -5
  87. anemoi/datasets/data/records/backends/__init__.py +2 -2
  88. anemoi/datasets/data/rescale.py +5 -12
  89. anemoi/datasets/data/select.py +13 -16
  90. anemoi/datasets/data/statistics.py +4 -7
  91. anemoi/datasets/data/stores.py +23 -77
  92. anemoi/datasets/data/subset.py +8 -11
  93. anemoi/datasets/data/unchecked.py +7 -11
  94. anemoi/datasets/data/xy.py +25 -21
  95. anemoi/datasets/dates/__init__.py +13 -18
  96. anemoi/datasets/dates/groups.py +7 -10
  97. anemoi/datasets/grids.py +11 -12
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/METADATA +5 -4
  101. anemoi_datasets-0.5.27.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filters/__init__.py +0 -33
  103. anemoi/datasets/create/filters/empty.py +0 -37
  104. anemoi/datasets/create/filters/legacy.py +0 -93
  105. anemoi/datasets/create/filters/noop.py +0 -37
  106. anemoi/datasets/create/filters/orog_to_z.py +0 -58
  107. anemoi/datasets/create/filters/pressure_level_relative_humidity_to_specific_humidity.py +0 -83
  108. anemoi/datasets/create/filters/pressure_level_specific_humidity_to_relative_humidity.py +0 -84
  109. anemoi/datasets/create/filters/rename.py +0 -205
  110. anemoi/datasets/create/filters/rotate_winds.py +0 -105
  111. anemoi/datasets/create/filters/single_level_dewpoint_to_relative_humidity.py +0 -78
  112. anemoi/datasets/create/filters/single_level_relative_humidity_to_dewpoint.py +0 -84
  113. anemoi/datasets/create/filters/single_level_relative_humidity_to_specific_humidity.py +0 -163
  114. anemoi/datasets/create/filters/single_level_specific_humidity_to_relative_humidity.py +0 -451
  115. anemoi/datasets/create/filters/speeddir_to_uv.py +0 -95
  116. anemoi/datasets/create/filters/sum.py +0 -68
  117. anemoi/datasets/create/filters/transform.py +0 -51
  118. anemoi/datasets/create/filters/unrotate_winds.py +0 -105
  119. anemoi/datasets/create/filters/uv_to_speeddir.py +0 -94
  120. anemoi/datasets/create/filters/wz_to_w.py +0 -98
  121. anemoi/datasets/utils/__init__.py +0 -8
  122. anemoi_datasets-0.5.25.dist-info/RECORD +0 -150
  123. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/WHEEL +0 -0
  124. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/entry_points.txt +0 -0
  125. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/licenses/LICENSE +0 -0
  126. {anemoi_datasets-0.5.25.dist-info → anemoi_datasets-0.5.27.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,6 @@
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
- from typing import List
11
10
 
12
11
  from .data import MissingDateError
13
12
  from .data import add_dataset_path
@@ -23,7 +22,7 @@ except ImportError: # pragma: no cover
23
22
  # Local copy or not installed with setuptools
24
23
  __version__ = "999"
25
24
 
26
- __all__: List[str] = [
25
+ __all__: list[str] = [
27
26
  "add_dataset_path",
28
27
  "add_named_dataset",
29
28
  "list_dataset_names",
@@ -1,7 +1,14 @@
1
1
  # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
3
 
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
5
12
 
6
13
  TYPE_CHECKING = False
7
14
  if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
9
16
  from typing import Union
10
17
 
11
18
  VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
12
20
  else:
13
21
  VERSION_TUPLE = object
22
+ COMMIT_ID = object
14
23
 
15
24
  version: str
16
25
  __version__: str
17
26
  __version_tuple__: VERSION_TUPLE
18
27
  version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
19
30
 
20
- __version__ = version = '0.5.25'
21
- __version_tuple__ = version_tuple = (0, 5, 25)
31
+ __version__ = version = '0.5.27'
32
+ __version_tuple__ = version_tuple = (0, 5, 27)
33
+
34
+ __commit_id__ = commit_id = None
@@ -77,7 +77,7 @@ class Check(Command):
77
77
 
78
78
  recipe_filename = os.path.basename(recipe)
79
79
  recipe_name = os.path.splitext(recipe_filename)[0]
80
- in_recipe_name = yaml.safe_load(open(recipe, "r", encoding="utf-8"))["name"]
80
+ in_recipe_name = yaml.safe_load(open(recipe, encoding="utf-8"))["name"]
81
81
  if recipe_name != in_recipe_name:
82
82
  print(f"Recipe name {recipe_name} does not match the name in the recipe file {in_recipe_name}")
83
83
 
@@ -14,7 +14,6 @@ import sys
14
14
  from concurrent.futures import ThreadPoolExecutor
15
15
  from concurrent.futures import as_completed
16
16
  from typing import Any
17
- from typing import Optional
18
17
 
19
18
  import tqdm
20
19
  from anemoi.utils.remote import Transfer
@@ -136,7 +135,7 @@ class ZarrCopier:
136
135
  return zarr.storage.NestedDirectoryStore(path)
137
136
  return path
138
137
 
139
- def copy_chunk(self, n: int, m: int, source: Any, target: Any, _copy: Any, verbosity: int) -> Optional[slice]:
138
+ def copy_chunk(self, n: int, m: int, source: Any, target: Any, _copy: Any, verbosity: int) -> slice | None:
140
139
  """Copy a chunk of data from source to target.
141
140
 
142
141
  Parameters
@@ -120,7 +120,7 @@ class Create(Command):
120
120
  task("finalise", options)
121
121
 
122
122
  task("init_additions", options)
123
- task("run_additions", options)
123
+ task("load_additions", options)
124
124
  task("finalise_additions", options)
125
125
 
126
126
  task("patch", options)
@@ -81,7 +81,7 @@ class GribIndexCmd(Command):
81
81
  bool
82
82
  True if the path matches, False otherwise.
83
83
  """
84
- return fnmatch.fnmatch(path, args.match)
84
+ return fnmatch.fnmatch(os.path.basename(path), args.match)
85
85
 
86
86
  from anemoi.datasets.create.sources.grib_index import GribIndex
87
87
 
@@ -14,10 +14,6 @@ import os
14
14
  from copy import deepcopy
15
15
  from functools import cached_property
16
16
  from typing import Any
17
- from typing import Dict
18
- from typing import List
19
- from typing import Optional
20
- from typing import Union
21
17
 
22
18
  import numpy as np
23
19
  import semantic_version
@@ -39,7 +35,7 @@ from . import Command
39
35
  LOG = logging.getLogger(__name__)
40
36
 
41
37
 
42
- def compute_directory_size(path: str) -> Union[tuple[int, int], tuple[None, None]]:
38
+ def compute_directory_size(path: str) -> tuple[int, int] | tuple[None, None]:
43
39
  """Compute the total size and number of files in a directory.
44
40
 
45
41
  Parameters
@@ -104,7 +100,7 @@ def cos_local_time_bug(lon: float, date: datetime.datetime) -> float:
104
100
  return np.cos(radians)
105
101
 
106
102
 
107
- def find(config: Union[dict, list], name: str) -> Any:
103
+ def find(config: dict | list, name: str) -> Any:
108
104
  """Recursively search for a key in a nested dictionary or list.
109
105
 
110
106
  Parameters
@@ -167,7 +163,7 @@ class Version:
167
163
  print(f"🔢 Format version: {self.version}")
168
164
 
169
165
  @property
170
- def name_to_index(self) -> Dict[str, int]:
166
+ def name_to_index(self) -> dict[str, int]:
171
167
  """Get a mapping of variable names to their indices."""
172
168
  return find(self.metadata, "name_to_index")
173
169
 
@@ -208,30 +204,30 @@ class Version:
208
204
  return self.metadata["resolution"]
209
205
 
210
206
  @property
211
- def field_shape(self) -> Optional[tuple]:
207
+ def field_shape(self) -> tuple | None:
212
208
  """Get the field shape of the dataset."""
213
209
  return self.metadata.get("field_shape")
214
210
 
215
211
  @property
216
- def proj_string(self) -> Optional[str]:
212
+ def proj_string(self) -> str | None:
217
213
  """Get the projection string of the dataset."""
218
214
  return self.metadata.get("proj_string")
219
215
 
220
216
  @property
221
- def shape(self) -> Optional[tuple]:
217
+ def shape(self) -> tuple | None:
222
218
  """Get the shape of the dataset."""
223
219
  if self.data and hasattr(self.data, "shape"):
224
220
  return self.data.shape
225
221
 
226
222
  @property
227
- def n_missing_dates(self) -> Optional[int]:
223
+ def n_missing_dates(self) -> int | None:
228
224
  """Get the number of missing dates in the dataset."""
229
225
  if "missing_dates" in self.metadata:
230
226
  return len(self.metadata["missing_dates"])
231
227
  return None
232
228
 
233
229
  @property
234
- def uncompressed_data_size(self) -> Optional[int]:
230
+ def uncompressed_data_size(self) -> int | None:
235
231
  """Get the uncompressed data size of the dataset."""
236
232
  if self.data and hasattr(self.data, "dtype") and hasattr(self.data, "size"):
237
233
  return self.data.dtype.itemsize * self.data.size
@@ -258,7 +254,7 @@ class Version:
258
254
  print()
259
255
  shape_str = "📐 Shape : "
260
256
  if self.shape:
261
- shape_str += " × ".join(["{:,}".format(s) for s in self.shape])
257
+ shape_str += " × ".join([f"{s:,}" for s in self.shape])
262
258
  if self.uncompressed_data_size:
263
259
  shape_str += f" ({bytes(self.uncompressed_data_size)})"
264
260
  print(shape_str)
@@ -293,17 +289,17 @@ class Version:
293
289
  print()
294
290
 
295
291
  @property
296
- def variables(self) -> List[str]:
292
+ def variables(self) -> list[str]:
297
293
  """Get the list of variables in the dataset."""
298
294
  return [v[0] for v in sorted(self.name_to_index.items(), key=lambda x: x[1])]
299
295
 
300
296
  @property
301
- def total_size(self) -> Optional[int]:
297
+ def total_size(self) -> int | None:
302
298
  """Get the total size of the dataset."""
303
299
  return self.zarr.attrs.get("total_size")
304
300
 
305
301
  @property
306
- def total_number_of_files(self) -> Optional[int]:
302
+ def total_number_of_files(self) -> int | None:
307
303
  """Get the total number of files in the dataset."""
308
304
  return self.zarr.attrs.get("total_number_of_files")
309
305
 
@@ -348,7 +344,7 @@ class Version:
348
344
  return False
349
345
 
350
346
  @property
351
- def statistics_started(self) -> Optional[datetime.datetime]:
347
+ def statistics_started(self) -> datetime.datetime | None:
352
348
  """Get the timestamp when statistics computation started."""
353
349
  for d in reversed(self.metadata.get("history", [])):
354
350
  if d["action"] == "compute_statistics_start":
@@ -356,12 +352,12 @@ class Version:
356
352
  return None
357
353
 
358
354
  @property
359
- def build_flags(self) -> Optional[NDArray[Any]]:
355
+ def build_flags(self) -> NDArray[Any] | None:
360
356
  """Get the build flags of the dataset."""
361
357
  return self.zarr.get("_build_flags")
362
358
 
363
359
  @cached_property
364
- def copy_flags(self) -> Optional[NDArray[Any]]:
360
+ def copy_flags(self) -> NDArray[Any] | None:
365
361
  """Get the copy flags of the dataset."""
366
362
  if "_copy" not in self.zarr:
367
363
  return None
@@ -381,7 +377,7 @@ class Version:
381
377
  return not all(self.copy_flags)
382
378
 
383
379
  @property
384
- def build_lengths(self) -> Optional[NDArray]:
380
+ def build_lengths(self) -> NDArray | None:
385
381
  """Get the build lengths of the dataset."""
386
382
  return self.zarr.get("_build_lengths")
387
383
 
@@ -396,17 +392,13 @@ class Version:
396
392
  print(
397
393
  "📈 Progress:",
398
394
  progress(built, total, width=50),
399
- "{:.0f}%".format(built / total * 100),
395
+ f"{built / total * 100:.0f}%",
400
396
  )
401
397
  return
402
398
 
403
- if self.build_flags is None:
404
- print("🪫 Dataset not initialised")
405
- return
406
-
407
- build_flags = self.build_flags
399
+ build_flags = self.build_flags or np.array([], dtype=bool)
408
400
 
409
- build_lengths = self.build_lengths
401
+ build_lengths = self.build_lengths or np.array([], dtype=bool)
410
402
  assert build_flags.size == build_lengths.size
411
403
 
412
404
  latest_write_timestamp = self.zarr.attrs.get("latest_write_timestamp")
@@ -422,7 +414,7 @@ class Version:
422
414
  print(
423
415
  "📈 Progress:",
424
416
  progress(built, total, width=50),
425
- "{:.0f}%".format(built / total * 100),
417
+ f"{built / total * 100:.0f}%",
426
418
  )
427
419
  start = self.initialised
428
420
  if self.initialised:
@@ -623,7 +615,7 @@ class Version0_6(Version):
623
615
  """Represents version 0.6 of a dataset."""
624
616
 
625
617
  @property
626
- def initialised(self) -> Optional[datetime.datetime]:
618
+ def initialised(self) -> datetime.datetime | None:
627
619
  """Get the initialization timestamp of the dataset."""
628
620
  for record in self.metadata.get("history", []):
629
621
  if record["action"] == "initialised":
@@ -659,12 +651,12 @@ class Version0_6(Version):
659
651
  return all(build_flags)
660
652
 
661
653
  @property
662
- def name_to_index(self) -> Dict[str, int]:
654
+ def name_to_index(self) -> dict[str, int]:
663
655
  """Get a mapping of variable names to their indices."""
664
656
  return {n: i for i, n in enumerate(self.metadata["variables"])}
665
657
 
666
658
  @property
667
- def variables(self) -> List[str]:
659
+ def variables(self) -> list[str]:
668
660
  """Get the list of variables in the dataset."""
669
661
  return self.metadata["variables"]
670
662
 
@@ -706,7 +698,7 @@ class Version0_13(Version0_12):
706
698
  """Represents version 0.13 of a dataset."""
707
699
 
708
700
  @property
709
- def build_flags(self) -> Optional[NDArray]:
701
+ def build_flags(self) -> NDArray | None:
710
702
  """Get the build flags for the dataset."""
711
703
  if "_build" not in self.zarr:
712
704
  return None
@@ -714,7 +706,7 @@ class Version0_13(Version0_12):
714
706
  return build.get("flags")
715
707
 
716
708
  @property
717
- def build_lengths(self) -> Optional[NDArray]:
709
+ def build_lengths(self) -> NDArray | None:
718
710
  """Get the build lengths for the dataset."""
719
711
  if "_build" not in self.zarr:
720
712
  return None
@@ -792,10 +784,10 @@ class InspectZarr(Command):
792
784
 
793
785
  try:
794
786
  if progress:
795
- return version.progress()
787
+ version.progress()
796
788
 
797
789
  if statistics:
798
- return version.brute_force_statistics()
790
+ version.brute_force_statistics()
799
791
 
800
792
  version.info(detailed, size)
801
793
 
@@ -0,0 +1,59 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+ import importlib
10
+ import logging
11
+ from typing import Any
12
+
13
+ from anemoi.datasets.validate import validate_dataset
14
+
15
+ from . import Command
16
+
17
+ LOG = logging.getLogger(__name__)
18
+
19
+ DEFAULT_DATASET = "aifs-ea-an-oper-0001-mars-o96-1979-2023-6h-v8"
20
+
21
+
22
+ class Validate(Command):
23
+ """Command to validate an anemoi dataset."""
24
+
25
+ def add_arguments(self, command_parser: Any) -> None:
26
+ """Add arguments to the command parser.
27
+
28
+ Parameters
29
+ ----------
30
+ command_parser : Any
31
+ The command parser.
32
+ """
33
+
34
+ command_parser.add_argument("--callable", metavar="DATASET", default="anemoi.datasets.open_dataset")
35
+ command_parser.add_argument("--costly-checks", action="store_true", help="Run costly checks")
36
+ command_parser.add_argument("--detailed", action="store_true", help="Give detailed report")
37
+ command_parser.add_argument("path", metavar="DATASET")
38
+
39
+ def run(self, args: Any) -> None:
40
+ """Run the command.
41
+
42
+ Parameters
43
+ ----------
44
+ args : Any
45
+ The command arguments.
46
+ """
47
+
48
+ module_path, func_name = args.callable.rsplit(".", 1)
49
+ module = importlib.import_module(module_path)
50
+ callable_func = getattr(module, func_name)
51
+
52
+ if args.path == "default":
53
+ args.path = DEFAULT_DATASET
54
+
55
+ dataset = callable_func(args.path)
56
+ validate_dataset(dataset, costly_checks=args.costly_checks, detailed=args.detailed)
57
+
58
+
59
+ command = Validate
@@ -10,9 +10,6 @@
10
10
 
11
11
  import logging
12
12
  from typing import Any
13
- from typing import Dict
14
- from typing import Optional
15
- from typing import Tuple
16
13
 
17
14
  import numpy as np
18
15
  from earthkit.data.core.temporary import temp_file
@@ -36,7 +33,7 @@ SKIP = ("class", "stream", "type", "number", "expver", "_leg_number", "anoffset"
36
33
 
37
34
 
38
35
  def check_compatible(
39
- f1: Any, f2: Any, centre_field_as_mars: Dict[str, Any], ensemble_field_as_mars: Dict[str, Any]
36
+ f1: Any, f2: Any, centre_field_as_mars: dict[str, Any], ensemble_field_as_mars: dict[str, Any]
40
37
  ) -> None:
41
38
  """Check if two fields are compatible.
42
39
 
@@ -75,9 +72,9 @@ def recentre(
75
72
  *,
76
73
  members: Any,
77
74
  centre: Any,
78
- clip_variables: Tuple[str, ...] = CLIP_VARIABLES,
75
+ clip_variables: tuple[str, ...] = CLIP_VARIABLES,
79
76
  alpha: float = 1.0,
80
- output: Optional[str] = None,
77
+ output: str | None = None,
81
78
  ) -> Any:
82
79
  """Recentre ensemble members around the centre field.
83
80
 
@@ -16,8 +16,6 @@ import uuid
16
16
  import warnings
17
17
  from functools import cached_property
18
18
  from typing import Any
19
- from typing import Optional
20
- from typing import Union
21
19
 
22
20
  import cftime
23
21
  import numpy as np
@@ -102,8 +100,8 @@ def json_tidy(o: Any) -> Any:
102
100
 
103
101
  def build_statistics_dates(
104
102
  dates: list[datetime.datetime],
105
- start: Optional[datetime.datetime],
106
- end: Optional[datetime.datetime],
103
+ start: datetime.datetime | None,
104
+ end: datetime.datetime | None,
107
105
  ) -> tuple[str, str]:
108
106
  """Compute the start and end dates for the statistics.
109
107
 
@@ -359,7 +357,7 @@ class Actor: # TODO: rename to Creator
359
357
 
360
358
  dataset_class = WritableDataset
361
359
 
362
- def __init__(self, path: str, cache: Optional[str] = None):
360
+ def __init__(self, path: str, cache: str | None = None):
363
361
  """Initialize an Actor instance.
364
362
 
365
363
  Parameters
@@ -577,10 +575,10 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
577
575
  check_name: bool = False,
578
576
  overwrite: bool = False,
579
577
  use_threads: bool = False,
580
- statistics_temp_dir: Optional[str] = None,
578
+ statistics_temp_dir: str | None = None,
581
579
  progress: Any = None,
582
580
  test: bool = False,
583
- cache: Optional[str] = None,
581
+ cache: str | None = None,
584
582
  **kwargs: Any,
585
583
  ):
586
584
  """Initialize an Init instance.
@@ -809,11 +807,11 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
809
807
  def __init__(
810
808
  self,
811
809
  path: str,
812
- parts: Optional[str] = None,
810
+ parts: str | None = None,
813
811
  use_threads: bool = False,
814
- statistics_temp_dir: Optional[str] = None,
812
+ statistics_temp_dir: str | None = None,
815
813
  progress: Any = None,
816
- cache: Optional[str] = None,
814
+ cache: str | None = None,
817
815
  **kwargs: Any,
818
816
  ):
819
817
  """Initialize a Load instance.
@@ -907,8 +905,8 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
907
905
  print("Requested dates", compress_dates(dates))
908
906
  print("Cube dates", compress_dates(dates_in_data))
909
907
 
910
- a = set(as_datetime(_) for _ in dates)
911
- b = set(as_datetime(_) for _ in dates_in_data)
908
+ a = {as_datetime(_) for _ in dates}
909
+ b = {as_datetime(_) for _ in dates_in_data}
912
910
 
913
911
  print("Missing dates", compress_dates(a - b))
914
912
  print("Extra dates", compress_dates(b - a))
@@ -958,7 +956,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
958
956
  array.flush()
959
957
  LOG.info("Flushed data array")
960
958
 
961
- def _get_allow_nans(self) -> Union[bool, list]:
959
+ def _get_allow_nans(self) -> bool | list:
962
960
  """Get the allow_nans configuration.
963
961
 
964
962
  Returns
@@ -991,7 +989,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
991
989
  total = cube.count(reading_chunks)
992
990
  LOG.debug(f"Loading datacube: {cube}")
993
991
 
994
- def position(x: Any) -> Optional[int]:
992
+ def position(x: Any) -> int | None:
995
993
  if isinstance(x, str) and "/" in x:
996
994
  x = x.split("/")
997
995
  return int(x[0])
@@ -1038,7 +1036,7 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
1038
1036
  def __init__(
1039
1037
  self,
1040
1038
  path: str,
1041
- statistics_temp_dir: Optional[str] = None,
1039
+ statistics_temp_dir: str | None = None,
1042
1040
  delta: list = [],
1043
1041
  use_threads: bool = False,
1044
1042
  **kwargs: Any,
@@ -1217,19 +1215,19 @@ class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
1217
1215
  LOG.info(f"Cleaned temporary storage {self.tmp_storage_path}")
1218
1216
 
1219
1217
 
1220
- class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
1218
+ class _LoadAdditions(Actor, HasRegistryMixin, AdditionsMixin):
1221
1219
  """A class to run dataset additions."""
1222
1220
 
1223
1221
  def __init__(
1224
1222
  self,
1225
1223
  path: str,
1226
1224
  delta: str,
1227
- parts: Optional[str] = None,
1225
+ parts: str | None = None,
1228
1226
  use_threads: bool = False,
1229
1227
  progress: Any = None,
1230
1228
  **kwargs: Any,
1231
1229
  ):
1232
- """Initialize a _RunAdditions instance.
1230
+ """Initialize a _LoadAdditions instance.
1233
1231
 
1234
1232
  Parameters
1235
1233
  ----------
@@ -1469,7 +1467,7 @@ def multi_addition(cls: type) -> type:
1469
1467
 
1470
1468
 
1471
1469
  InitAdditions = multi_addition(_InitAdditions)
1472
- RunAdditions = multi_addition(_RunAdditions)
1470
+ LoadAdditions = multi_addition(_LoadAdditions)
1473
1471
  FinaliseAdditions = multi_addition(_FinaliseAdditions)
1474
1472
 
1475
1473
 
@@ -1480,7 +1478,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
1480
1478
  self,
1481
1479
  path: str,
1482
1480
  use_threads: bool = False,
1483
- statistics_temp_dir: Optional[str] = None,
1481
+ statistics_temp_dir: str | None = None,
1484
1482
  progress: Any = None,
1485
1483
  **kwargs: Any,
1486
1484
  ):
@@ -1539,7 +1537,7 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
1539
1537
  LOG.info(f"Wrote statistics in {self.path}")
1540
1538
 
1541
1539
  @cached_property
1542
- def allow_nans(self) -> Union[bool, list]:
1540
+ def allow_nans(self) -> bool | list:
1543
1541
  """Check if NaNs are allowed."""
1544
1542
  import zarr
1545
1543
 
@@ -1581,7 +1579,7 @@ def chain(tasks: list) -> type:
1581
1579
  return Chain
1582
1580
 
1583
1581
 
1584
- def creator_factory(name: str, trace: Optional[str] = None, **kwargs: Any) -> Any:
1582
+ def creator_factory(name: str, trace: str | None = None, **kwargs: Any) -> Any:
1585
1583
  """Create a dataset creator.
1586
1584
 
1587
1585
  Parameters
@@ -1612,10 +1610,9 @@ def creator_factory(name: str, trace: Optional[str] = None, **kwargs: Any) -> An
1612
1610
  cleanup=Cleanup,
1613
1611
  verify=Verify,
1614
1612
  init_additions=InitAdditions,
1615
- load_additions=RunAdditions,
1616
- run_additions=RunAdditions,
1613
+ load_additions=LoadAdditions,
1617
1614
  finalise_additions=chain([FinaliseAdditions, Size]),
1618
- additions=chain([InitAdditions, RunAdditions, FinaliseAdditions, Size, Cleanup]),
1615
+ additions=chain([InitAdditions, LoadAdditions, FinaliseAdditions, Size, Cleanup]),
1619
1616
  )[name]
1620
1617
  LOG.debug(f"Creating {cls.__name__} with {kwargs}")
1621
1618
  return cls(**kwargs)
@@ -12,10 +12,8 @@ import datetime
12
12
  import logging
13
13
  import re
14
14
  import warnings
15
+ from collections.abc import Callable
15
16
  from typing import Any
16
- from typing import Callable
17
- from typing import Optional
18
- from typing import Union
19
17
 
20
18
  import numpy as np
21
19
  from anemoi.utils.config import load_config
@@ -31,10 +29,10 @@ class DatasetName:
31
29
  def __init__(
32
30
  self,
33
31
  name: str,
34
- resolution: Optional[str] = None,
35
- start_date: Optional[datetime.date] = None,
36
- end_date: Optional[datetime.date] = None,
37
- frequency: Optional[datetime.timedelta] = None,
32
+ resolution: str | None = None,
33
+ start_date: datetime.date | None = None,
34
+ end_date: datetime.date | None = None,
35
+ frequency: datetime.timedelta | None = None,
38
36
  ):
39
37
  """Initialize a DatasetName instance.
40
38
 
@@ -146,7 +144,7 @@ class DatasetName:
146
144
  "https://anemoi-registry.readthedocs.io/en/latest/naming-conventions.html"
147
145
  )
148
146
 
149
- def check_resolution(self, resolution: Optional[str]) -> None:
147
+ def check_resolution(self, resolution: str | None) -> None:
150
148
  """Check if the resolution matches the expected format.
151
149
 
152
150
  Parameters
@@ -175,7 +173,7 @@ class DatasetName:
175
173
  if not c.isalnum() and c not in "-":
176
174
  self.messages.append(f"the {self.name} should only contain alphanumeric characters and '-'.")
177
175
 
178
- def check_frequency(self, frequency: Optional[datetime.timedelta]) -> None:
176
+ def check_frequency(self, frequency: datetime.timedelta | None) -> None:
179
177
  """Check if the frequency matches the expected format.
180
178
 
181
179
  Parameters
@@ -189,7 +187,7 @@ class DatasetName:
189
187
  self._check_missing("frequency", frequency_str)
190
188
  self._check_mismatch("frequency", frequency_str)
191
189
 
192
- def check_start_date(self, start_date: Optional[datetime.date]) -> None:
190
+ def check_start_date(self, start_date: datetime.date | None) -> None:
193
191
  """Check if the start date matches the expected format.
194
192
 
195
193
  Parameters
@@ -203,7 +201,7 @@ class DatasetName:
203
201
  self._check_missing("start_date", start_date_str)
204
202
  self._check_mismatch("start_date", start_date_str)
205
203
 
206
- def check_end_date(self, end_date: Optional[datetime.date]) -> None:
204
+ def check_end_date(self, end_date: datetime.date | None) -> None:
207
205
  """Check if the end date matches the expected format.
208
206
 
209
207
  Parameters
@@ -251,7 +249,7 @@ class StatisticsValueError(ValueError):
251
249
 
252
250
 
253
251
  def check_data_values(
254
- arr: NDArray[Any], *, name: str, log: list = [], allow_nans: Union[bool, list, set, tuple, dict] = False
252
+ arr: NDArray[Any], *, name: str, log: list = [], allow_nans: bool | list | set | tuple | dict = False
255
253
  ) -> None:
256
254
  """Check the values in the data array for validity.
257
255
 
@@ -9,7 +9,6 @@
9
9
 
10
10
  import logging
11
11
  import warnings
12
- from typing import Union
13
12
 
14
13
  LOG = logging.getLogger(__name__)
15
14
 
@@ -27,7 +26,7 @@ class ChunkFilter:
27
26
  The chunks that are allowed to be processed.
28
27
  """
29
28
 
30
- def __init__(self, *, parts: Union[str, list], total: int):
29
+ def __init__(self, *, parts: str | list, total: int):
31
30
  """Initializes the ChunkFilter with the given parts and total number of chunks.
32
31
 
33
32
  Parameters