anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -10,12 +10,9 @@
10
10
 
11
11
  import datetime
12
12
  import logging
13
+ from collections.abc import Sequence
13
14
  from functools import cached_property
14
15
  from typing import Any
15
- from typing import Dict
16
- from typing import List
17
- from typing import Sequence
18
- from typing import Set
19
16
  from typing import Union
20
17
 
21
18
  import numpy as np
@@ -95,7 +92,7 @@ def _end(a: int, b: int, dates: NDArray[np.datetime64]) -> int:
95
92
  return b
96
93
 
97
94
 
98
- def _combine_reasons(reason1: Dict[str, Any], reason2: Dict[str, Any], dates: NDArray[np.datetime64]) -> Dict[str, Any]:
95
+ def _combine_reasons(reason1: dict[str, Any], reason2: dict[str, Any], dates: NDArray[np.datetime64]) -> dict[str, Any]:
99
96
  """Combine two reason dictionaries.
100
97
 
101
98
  Parameters:
@@ -126,7 +123,7 @@ class Subset(Forwards):
126
123
  reason (Dict[str, Any]): Dictionary of reasons.
127
124
  """
128
125
 
129
- def __init__(self, dataset: Union[Dataset, "Subset"], indices: Sequence[int], reason: Dict[str, Any]) -> None:
126
+ def __init__(self, dataset: Union[Dataset, "Subset"], indices: Sequence[int], reason: dict[str, Any]) -> None:
130
127
  """Initialize the Subset.
131
128
 
132
129
  Parameters:
@@ -140,8 +137,8 @@ class Subset(Forwards):
140
137
  dataset = dataset.dataset
141
138
 
142
139
  self.dataset: Dataset = dataset
143
- self.indices: List[int] = list(indices)
144
- self.reason: Dict[str, Any] = {k: v for k, v in reason.items() if v is not None}
140
+ self.indices: list[int] = list(indices)
141
+ self.reason: dict[str, Any] = {k: v for k, v in reason.items() if v is not None}
145
142
 
146
143
  # Forward other properties to the super dataset
147
144
  super().__init__(dataset)
@@ -274,10 +271,10 @@ class Subset(Forwards):
274
271
  return f"Subset({self.dataset},{self.dates[0]}...{self.dates[-1]}/{self.frequency})"
275
272
 
276
273
  @cached_property
277
- def missing(self) -> Set[int]:
274
+ def missing(self) -> set[int]:
278
275
  """Get the missing indices of the subset."""
279
276
  missing = self.dataset.missing
280
- result: Set[int] = set()
277
+ result: set[int] = set()
281
278
  for j, i in enumerate(self.indices):
282
279
  if i in missing:
283
280
  result.add(j)
@@ -291,7 +288,7 @@ class Subset(Forwards):
291
288
  """
292
289
  return Node(self, [self.dataset.tree()], **self.reason)
293
290
 
294
- def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
291
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
295
292
  """Get the metadata specific to the forwards subclass.
296
293
 
297
294
  Returns:
@@ -10,14 +10,10 @@
10
10
 
11
11
  import datetime
12
12
  import logging
13
+ from collections.abc import Callable
13
14
  from functools import cached_property
14
15
  from functools import wraps
15
16
  from typing import Any
16
- from typing import Callable
17
- from typing import Dict
18
- from typing import List
19
- from typing import Optional
20
- from typing import Set
21
17
 
22
18
  import numpy as np
23
19
  from numpy.typing import NDArray
@@ -163,13 +159,13 @@ class Unchecked(Combined):
163
159
 
164
160
  @check("check_same_variables")
165
161
  @property
166
- def name_to_index(self) -> Dict[str, int]:
162
+ def name_to_index(self) -> dict[str, int]:
167
163
  """Get the mapping of variable names to their indices."""
168
164
  raise NotImplementedError()
169
165
 
170
166
  @check("check_same_variables")
171
167
  @property
172
- def variables(self) -> List[str]:
168
+ def variables(self) -> list[str]:
173
169
  """Get the list of variables in the dataset."""
174
170
  raise NotImplementedError()
175
171
 
@@ -181,12 +177,12 @@ class Unchecked(Combined):
181
177
 
182
178
  @check("check_same_variables")
183
179
  @property
184
- def statistics(self) -> Dict[str, NDArray[Any]]:
180
+ def statistics(self) -> dict[str, NDArray[Any]]:
185
181
  """Get the statistics of the dataset."""
186
182
  raise NotImplementedError()
187
183
 
188
184
  @check("check_same_variables")
189
- def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
185
+ def statistics_tendencies(self, delta: datetime.timedelta | None = None) -> dict[str, NDArray[Any]]:
190
186
  """Get the statistics tendencies of the dataset.
191
187
 
192
188
  Parameters
@@ -207,9 +203,9 @@ class Unchecked(Combined):
207
203
  raise NotImplementedError()
208
204
 
209
205
  @cached_property
210
- def missing(self) -> Set[int]:
206
+ def missing(self) -> set[int]:
211
207
  """Get the missing data indices."""
212
- result: Set[int] = set()
208
+ result: set[int] = set()
213
209
  for d in self.datasets:
214
210
  result = result | d.missing
215
211
  return result
@@ -11,10 +11,6 @@
11
11
  import logging
12
12
  from functools import cached_property
13
13
  from typing import Any
14
- from typing import Dict
15
- from typing import List
16
- from typing import Set
17
- from typing import Tuple
18
14
 
19
15
  from .dataset import Dataset
20
16
  from .dataset import FullIndex
@@ -29,7 +25,7 @@ LOG = logging.getLogger(__name__)
29
25
  class ZipBase(Combined):
30
26
  """Base class for handling zipped datasets."""
31
27
 
32
- def __init__(self, datasets: List[Any], check_compatibility: bool = True) -> None:
28
+ def __init__(self, datasets: list[Any], check_compatibility: bool = True) -> None:
33
29
  """Initialize ZipBase with a list of datasets.
34
30
 
35
31
  Parameters
@@ -58,7 +54,7 @@ class ZipBase(Combined):
58
54
  new_parents = [parent.clone(ds) for ds in self.datasets]
59
55
  return self.clone(new_parents)
60
56
 
61
- def clone(self, datasets: List[Any]) -> "ZipBase":
57
+ def clone(self, datasets: list[Any]) -> "ZipBase":
62
58
  """Clone the ZipBase with new datasets.
63
59
 
64
60
  Parameters
@@ -81,7 +77,11 @@ class ZipBase(Combined):
81
77
  Node
82
78
  Tree representation of the datasets.
83
79
  """
84
- return Node(self, [d.tree() for d in self.datasets], check_compatibility=self._check_compatibility)
80
+ return Node(
81
+ self,
82
+ [d.tree() for d in self.datasets],
83
+ check_compatibility=self._check_compatibility,
84
+ )
85
85
 
86
86
  def __len__(self) -> int:
87
87
  """Get the length of the smallest dataset.
@@ -93,7 +93,7 @@ class ZipBase(Combined):
93
93
  """
94
94
  return min(len(d) for d in self.datasets)
95
95
 
96
- def __getitem__(self, n: FullIndex) -> Tuple[Any, ...]:
96
+ def __getitem__(self, n: FullIndex) -> tuple[Any, ...]:
97
97
  """Get the item at the specified index from all datasets.
98
98
 
99
99
  Parameters
@@ -145,55 +145,55 @@ class ZipBase(Combined):
145
145
  pass
146
146
 
147
147
  @cached_property
148
- def missing(self) -> Set[int]:
148
+ def missing(self) -> set[int]:
149
149
  """Get the set of missing indices from all datasets."""
150
- result: Set[int] = set()
150
+ result: set[int] = set()
151
151
  for d in self.datasets:
152
152
  result = result | d.missing
153
153
  return result
154
154
 
155
155
  @property
156
- def shape(self) -> Tuple[Any, ...]:
156
+ def shape(self) -> tuple[Any, ...]:
157
157
  """Get the shape of all datasets."""
158
158
  return tuple(d.shape for d in self.datasets)
159
159
 
160
160
  @property
161
- def field_shape(self) -> Tuple[Any, ...]:
161
+ def field_shape(self) -> tuple[Any, ...]:
162
162
  """Get the field shape of all datasets."""
163
163
  return tuple(d.shape for d in self.datasets)
164
164
 
165
165
  @property
166
- def latitudes(self) -> Tuple[Any, ...]:
166
+ def latitudes(self) -> tuple[Any, ...]:
167
167
  """Get the latitudes of all datasets."""
168
168
  return tuple(d.latitudes for d in self.datasets)
169
169
 
170
170
  @property
171
- def longitudes(self) -> Tuple[Any, ...]:
171
+ def longitudes(self) -> tuple[Any, ...]:
172
172
  """Get the longitudes of all datasets."""
173
173
  return tuple(d.longitudes for d in self.datasets)
174
174
 
175
175
  @property
176
- def dtype(self) -> Tuple[Any, ...]:
176
+ def dtype(self) -> tuple[Any, ...]:
177
177
  """Get the data types of all datasets."""
178
178
  return tuple(d.dtype for d in self.datasets)
179
179
 
180
180
  @property
181
- def grids(self) -> Tuple[Any, ...]:
181
+ def grids(self) -> tuple[Any, ...]:
182
182
  """Get the grids of all datasets."""
183
183
  return tuple(d.grids for d in self.datasets)
184
184
 
185
185
  @property
186
- def statistics(self) -> Tuple[Any, ...]:
186
+ def statistics(self) -> tuple[Any, ...]:
187
187
  """Get the statistics of all datasets."""
188
188
  return tuple(d.statistics for d in self.datasets)
189
189
 
190
190
  @property
191
- def resolution(self) -> Tuple[Any, ...]:
191
+ def resolution(self) -> tuple[Any, ...]:
192
192
  """Get the resolution of all datasets."""
193
193
  return tuple(d.resolution for d in self.datasets)
194
194
 
195
195
  @property
196
- def name_to_index(self) -> Tuple[Any, ...]:
196
+ def name_to_index(self) -> tuple[Any, ...]:
197
197
  """Get the name to index mapping of all datasets."""
198
198
  return tuple(d.name_to_index for d in self.datasets)
199
199
 
@@ -210,6 +210,10 @@ class ZipBase(Combined):
210
210
  if self._check_compatibility:
211
211
  super().check_compatibility(d1, d2)
212
212
 
213
+ def forwards_subclass_metadata_specific(self) -> dict[str, Any]:
214
+ """Get the metadata specific to the subclass."""
215
+ return {}
216
+
213
217
 
214
218
  class Zip(ZipBase):
215
219
  """Class for handling zipped datasets."""
@@ -223,7 +227,7 @@ class XY(ZipBase):
223
227
  pass
224
228
 
225
229
 
226
- def xy_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> XY:
230
+ def xy_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> XY:
227
231
  """Factory function to create an XY instance.
228
232
 
229
233
  Parameters
@@ -256,7 +260,7 @@ def xy_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> XY:
256
260
  return XY(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
257
261
 
258
262
 
259
- def zip_factory(args: Tuple[Any, ...], kwargs: Dict[str, Any]) -> Zip:
263
+ def zip_factory(args: tuple[Any, ...], kwargs: dict[str, Any]) -> Zip:
260
264
  """Factory function to create a Zip instance.
261
265
 
262
266
  Parameters
@@ -10,15 +10,10 @@
10
10
 
11
11
  import datetime
12
12
  import warnings
13
+ from collections.abc import Iterator
13
14
  from functools import reduce
14
15
  from math import gcd
15
16
  from typing import Any
16
- from typing import Dict
17
- from typing import Iterator
18
- from typing import List
19
- from typing import Optional
20
- from typing import Tuple
21
- from typing import Union
22
17
 
23
18
  # from anemoi.utils.dates import as_datetime
24
19
  from anemoi.utils.dates import DateTimes
@@ -29,7 +24,7 @@ from anemoi.utils.hindcasts import HindcastDatesTimes
29
24
  from anemoi.utils.humanize import print_dates
30
25
 
31
26
 
32
- def extend(x: Union[str, List[Any], Tuple[Any, ...]]) -> Iterator[datetime.datetime]:
27
+ def extend(x: str | list[Any] | tuple[Any, ...]) -> Iterator[datetime.datetime]:
33
28
  """Extend a date range or list of dates into individual datetime objects.
34
29
 
35
30
  Args:
@@ -63,6 +58,8 @@ def extend(x: Union[str, List[Any], Tuple[Any, ...]]) -> Iterator[datetime.datet
63
58
  class DatesProvider:
64
59
  """Base class for date generation.
65
60
 
61
+ Examples
62
+ --------
66
63
  >>> DatesProvider.from_config(**{"start": "2023-01-01 00:00", "end": "2023-01-02 00:00", "frequency": "1d"}).values
67
64
  [datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 2, 0, 0)]
68
65
 
@@ -86,7 +83,7 @@ class DatesProvider:
86
83
  3
87
84
  """
88
85
 
89
- def __init__(self, missing: Optional[List[Union[str, datetime.datetime]]] = None) -> None:
86
+ def __init__(self, missing: list[str | datetime.datetime] | None = None) -> None:
90
87
  """Initialize the DatesProvider with optional missing dates.
91
88
 
92
89
  Parameters
@@ -168,7 +165,7 @@ class ValuesDates(DatesProvider):
168
165
  **kwargs (Any): Additional arguments.
169
166
  """
170
167
 
171
- def __init__(self, values: List[Union[str, datetime.datetime]], **kwargs: Any) -> None:
168
+ def __init__(self, values: list[str | datetime.datetime], **kwargs: Any) -> None:
172
169
  """Initialize ValuesDates with a list of values.
173
170
 
174
171
  Args:
@@ -188,7 +185,7 @@ class ValuesDates(DatesProvider):
188
185
  """
189
186
  return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
190
187
 
191
- def as_dict(self) -> Dict[str, Any]:
188
+ def as_dict(self) -> dict[str, Any]:
192
189
  """Convert the ValuesDates instance to a dictionary.
193
190
 
194
191
  Returns
@@ -215,9 +212,9 @@ class StartEndDates(DatesProvider):
215
212
 
216
213
  def __init__(
217
214
  self,
218
- start: Union[str, datetime.datetime],
219
- end: Union[str, datetime.datetime],
220
- frequency: Union[int, str] = 1,
215
+ start: str | datetime.datetime,
216
+ end: str | datetime.datetime,
217
+ frequency: int | str = 1,
221
218
  **kwargs: Any,
222
219
  ) -> None:
223
220
  """Initialize StartEndDates with start, end, and frequency.
@@ -259,7 +256,7 @@ class StartEndDates(DatesProvider):
259
256
 
260
257
  super().__init__(missing=missing)
261
258
 
262
- def as_dict(self) -> Dict[str, Any]:
259
+ def as_dict(self) -> dict[str, Any]:
263
260
  """Convert the StartEndDates instance to a dictionary.
264
261
 
265
262
  Returns
@@ -314,9 +311,9 @@ class HindcastsDates(DatesProvider):
314
311
 
315
312
  def __init__(
316
313
  self,
317
- start: Union[str, List[str]],
318
- end: Union[str, List[str]],
319
- steps: List[int] = [0],
314
+ start: str | list[str],
315
+ end: str | list[str],
316
+ steps: list[int] = [0],
320
317
  years: int = 20,
321
318
  **kwargs: Any,
322
319
  ) -> None:
@@ -403,7 +400,7 @@ class HindcastsDates(DatesProvider):
403
400
  """
404
401
  return f"{self.__class__.__name__}({self.values[0]}..{self.values[-1]})"
405
402
 
406
- def as_dict(self) -> Dict[str, Any]:
403
+ def as_dict(self) -> dict[str, Any]:
407
404
  """Convert the HindcastsDates instance to a dictionary.
408
405
 
409
406
  Returns
@@ -12,19 +12,16 @@ import datetime
12
12
  import itertools
13
13
  from abc import ABC
14
14
  from abc import abstractmethod
15
+ from collections.abc import Callable
16
+ from collections.abc import Iterator
15
17
  from functools import cached_property
16
18
  from typing import Any
17
- from typing import Callable
18
- from typing import Iterator
19
- from typing import List
20
- from typing import Tuple
21
- from typing import Union
22
19
 
23
20
  from anemoi.datasets.dates import DatesProvider
24
21
  from anemoi.datasets.dates import as_datetime
25
22
 
26
23
 
27
- def _shorten(dates: Union[List[datetime.datetime], Tuple[datetime.datetime, ...]]) -> Union[str, List[str]]:
24
+ def _shorten(dates: list[datetime.datetime] | tuple[datetime.datetime, ...]) -> str | list[str]:
28
25
  """Shorten the list of dates for display.
29
26
 
30
27
  Args:
@@ -43,7 +40,7 @@ def _shorten(dates: Union[List[datetime.datetime], Tuple[datetime.datetime, ...]
43
40
  class GroupOfDates:
44
41
  """A class to represent a group of dates."""
45
42
 
46
- def __init__(self, dates: List[datetime.datetime], provider: DatesProvider, partial_ok: bool = False) -> None:
43
+ def __init__(self, dates: list[datetime.datetime], provider: DatesProvider, partial_ok: bool = False) -> None:
47
44
  assert isinstance(provider, DatesProvider), type(provider)
48
45
  assert isinstance(dates, list)
49
46
 
@@ -197,10 +194,10 @@ class Groups:
197
194
  class Filter:
198
195
  """A class to filter out missing dates."""
199
196
 
200
- def __init__(self, missing: List[datetime.datetime]) -> None:
201
- self.missing = set(as_datetime(m) for m in missing)
197
+ def __init__(self, missing: list[datetime.datetime]) -> None:
198
+ self.missing = {as_datetime(m) for m in missing}
202
199
 
203
- def __call__(self, dates: List[datetime.datetime]) -> List[datetime.datetime]:
200
+ def __call__(self, dates: list[datetime.datetime]) -> list[datetime.datetime]:
204
201
  """Filter out missing dates from the list of dates.
205
202
 
206
203
  Args:
@@ -0,0 +1,76 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+ import datetime
11
+ import io
12
+ import logging
13
+
14
+ import ruamel.yaml
15
+
16
+ LOG = logging.getLogger(__name__)
17
+
18
+
19
+ def represent_date(dumper, data):
20
+
21
+ if isinstance(data, datetime.datetime):
22
+ if data.tzinfo is None:
23
+ data = data.replace(tzinfo=datetime.timezone.utc)
24
+ data = data.astimezone(datetime.timezone.utc)
25
+ iso_str = data.replace(tzinfo=None).isoformat(timespec="seconds") + "Z"
26
+ else:
27
+ iso_str = data.isoformat()
28
+
29
+ return dumper.represent_scalar("tag:yaml.org,2002:timestamp", iso_str)
30
+
31
+
32
+ # --- Represent multiline strings with | style ---
33
+ def represent_multiline_str(dumper, data):
34
+ if "\n" in data:
35
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data.strip(), style="|")
36
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data)
37
+
38
+
39
+ # --- Represent short lists inline (flow style) ---
40
+ def represent_inline_list(dumper, data):
41
+
42
+ if not all(isinstance(i, (str, int, float, bool, type(None))) for i in data):
43
+ return dumper.represent_sequence("tag:yaml.org,2002:seq", data)
44
+
45
+ return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
46
+
47
+
48
+ def yaml_dump(obj, order=None, stream=None, **kwargs):
49
+
50
+ if order:
51
+
52
+ def _ordering(k):
53
+ return order.index(k) if k in order else len(order)
54
+
55
+ obj = {k: v for k, v in sorted(obj.items(), key=lambda item: _ordering(item[0]))}
56
+
57
+ yaml = ruamel.yaml.YAML()
58
+ yaml.width = 120 # wrap long flow sequences
59
+
60
+ yaml.Representer.add_representer(datetime.date, represent_date)
61
+ yaml.Representer.add_representer(datetime.datetime, represent_date)
62
+ yaml.Representer.add_representer(str, represent_multiline_str)
63
+ yaml.Representer.add_representer(list, represent_inline_list)
64
+
65
+ data = ruamel.yaml.comments.CommentedMap()
66
+ for i, (k, v) in enumerate(obj.items()):
67
+ data[k] = v
68
+ if i > 0:
69
+ data.yaml_set_comment_before_after_key(key=k, before="\n")
70
+
71
+ if stream:
72
+ yaml.dump(data, stream=stream, **kwargs)
73
+
74
+ stream = io.StringIO()
75
+ yaml.dump(data, stream=stream, **kwargs)
76
+ return stream.getvalue()