anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -10,9 +10,6 @@
10
10
  import logging
11
11
  from functools import cached_property
12
12
  from typing import Any
13
- from typing import Dict
14
- from typing import List
15
- from typing import Union
16
13
 
17
14
  from earthkit.data import FieldList
18
15
 
@@ -20,7 +17,7 @@ from ...dates.groups import GroupOfDates
20
17
  from .action import Action
21
18
  from .action import action_factory
22
19
  from .misc import _tidy
23
- from .result import Result
20
+ from .result.field import Result
24
21
 
25
22
  LOG = logging.getLogger(__name__)
26
23
 
@@ -31,9 +28,9 @@ class DataSourcesAction(Action):
31
28
  def __init__(
32
29
  self,
33
30
  context: object,
34
- action_path: List[str],
35
- sources: Union[Dict[str, Any], List[Dict[str, Any]]],
36
- input: Dict[str, Any],
31
+ action_path: list[str],
32
+ sources: dict[str, Any] | list[dict[str, Any]],
33
+ input: dict[str, Any],
37
34
  ) -> None:
38
35
  """Initializes a DataSourcesAction instance.
39
36
 
@@ -58,6 +55,7 @@ class DataSourcesAction(Action):
58
55
 
59
56
  self.sources = [action_factory(config, context, ["data_sources"] + [a_path]) for a_path, config in configs]
60
57
  self.input = action_factory(input, context, ["input"])
58
+ self.names = [a_path for a_path, config in configs]
61
59
 
62
60
  def select(self, group_of_dates: GroupOfDates) -> "DataSourcesResult":
63
61
  """Selects the data sources result for the given group of dates.
@@ -93,10 +91,10 @@ class DataSourcesResult(Result):
93
91
  def __init__(
94
92
  self,
95
93
  context: object,
96
- action_path: List[str],
94
+ action_path: list[str],
97
95
  dates: object,
98
96
  input_result: Result,
99
- sources_results: List[Result],
97
+ sources_results: list[Result],
100
98
  ) -> None:
101
99
  """Initializes a DataSourcesResult instance.
102
100
 
@@ -8,11 +8,6 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
  import logging
11
- from functools import wraps
12
- from typing import Any
13
- from typing import Callable
14
- from typing import Tuple
15
- from typing import Union
16
11
 
17
12
  from earthkit.data import FieldList
18
13
  from earthkit.data.core.fieldlist import MultiFieldList
@@ -20,75 +15,7 @@ from earthkit.data.core.fieldlist import MultiFieldList
20
15
  LOG = logging.getLogger(__name__)
21
16
 
22
17
 
23
- def parse_function_name(name: str) -> Tuple[str, Union[int, None]]:
24
- """Parses a function name to extract the base name and an optional time delta.
25
-
26
- Parameters
27
- ----------
28
- name : str
29
- The function name to parse.
30
-
31
- Returns
32
- -------
33
- tuple of (str, int or None)
34
- The base name and an optional time delta.
35
- """
36
- if name.endswith("h") and name[:-1].isdigit():
37
-
38
- if "-" in name:
39
- name, delta = name.split("-")
40
- sign = -1
41
-
42
- elif "+" in name:
43
- name, delta = name.split("+")
44
- sign = 1
45
-
46
- else:
47
- return name, None
48
-
49
- assert delta[-1] == "h", (name, delta)
50
- delta = sign * int(delta[:-1])
51
- return name, delta
52
-
53
- return name, None
54
-
55
-
56
- def assert_fieldlist(method: Callable[..., Any]) -> Callable[..., Any]:
57
- """Decorator to assert that the result of a method is an instance of FieldList.
58
-
59
- Parameters
60
- ----------
61
- method : Callable[..., Any]
62
- The method to decorate.
63
-
64
- Returns
65
- -------
66
- Callable[..., Any]
67
- The decorated method.
68
- """
69
-
70
- @wraps(method)
71
- def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
72
-
73
- result = method(self, *args, **kwargs)
74
- assert isinstance(result, FieldList), type(result)
75
- return result
76
-
77
- return wrapper
78
-
79
-
80
- def assert_is_fieldlist(obj: object) -> None:
81
- """Asserts that the given object is an instance of FieldList.
82
-
83
- Parameters
84
- ----------
85
- obj : object
86
- The object to check.
87
- """
88
- assert isinstance(obj, FieldList), type(obj)
89
-
90
-
91
- def _flatten(ds: Union[MultiFieldList, FieldList]) -> list:
18
+ def _flatten(ds: MultiFieldList | FieldList) -> list:
92
19
  """Flattens a MultiFieldList or FieldList into a list of FieldList objects.
93
20
 
94
21
  Parameters
@@ -106,7 +33,7 @@ def _flatten(ds: Union[MultiFieldList, FieldList]) -> list:
106
33
  return [ds]
107
34
 
108
35
 
109
- def _tidy(ds: Union[MultiFieldList, FieldList], indent: int = 0) -> Union[MultiFieldList, FieldList]:
36
+ def _tidy(ds: MultiFieldList | FieldList, indent: int = 0) -> MultiFieldList | FieldList:
110
37
  """Tidies up a MultiFieldList or FieldList by removing empty sources.
111
38
 
112
39
  Parameters
@@ -10,26 +10,13 @@
10
10
 
11
11
  import logging
12
12
  from collections import defaultdict
13
+ from collections.abc import Generator
13
14
  from typing import Any
14
- from typing import Dict
15
- from typing import Generator
16
- from typing import List
17
- from typing import Optional
18
- from typing import Set
19
- from typing import Tuple
20
15
 
21
16
  import numpy as np
22
- from anemoi.transform.fields import new_field_with_valid_datetime
23
- from anemoi.transform.fields import new_fieldlist_from_list
24
17
  from anemoi.utils.dates import as_datetime
25
18
  from anemoi.utils.dates import frequency_to_timedelta
26
19
 
27
- from .action import Action
28
- from .action import action_factory
29
- from .join import JoinResult
30
- from .result import Result
31
- from .trace import trace_select
32
-
33
20
  LOG = logging.getLogger(__name__)
34
21
 
35
22
 
@@ -37,7 +24,7 @@ class DateMapper:
37
24
  """A factory class to create DateMapper instances based on the given mode."""
38
25
 
39
26
  @staticmethod
40
- def from_mode(mode: str, source: Any, config: Dict[str, Any]) -> "DateMapper":
27
+ def from_mode(mode: str, source: Any, config: dict[str, Any]) -> "DateMapper":
41
28
  """Create a DateMapper instance based on the given mode.
42
29
 
43
30
  Parameters
@@ -87,10 +74,10 @@ class DateMapperClosest(DateMapper):
87
74
  self.maximum: Any = frequency_to_timedelta(maximum)
88
75
  self.frequency: Any = frequency_to_timedelta(frequency)
89
76
  self.skip_all_nans: bool = skip_all_nans
90
- self.tried: Set[Any] = set()
91
- self.found: Set[Any] = set()
77
+ self.tried: set[Any] = set()
78
+ self.found: set[Any] = set()
92
79
 
93
- def transform(self, group_of_dates: Any) -> Generator[Tuple[Any, Any], None, None]:
80
+ def transform(self, group_of_dates: Any) -> Generator[tuple[Any, Any], None, None]:
94
81
  """Transform the group of dates to the closest available dates.
95
82
 
96
83
  Parameters
@@ -185,7 +172,7 @@ class DateMapperClosest(DateMapper):
185
172
  class DateMapperClimatology(DateMapper):
186
173
  """A DateMapper implementation that maps dates to specified climatology dates."""
187
174
 
188
- def __init__(self, source: Any, year: int, day: int, hour: Optional[int] = None) -> None:
175
+ def __init__(self, source: Any, year: int, day: int, hour: int | None = None) -> None:
189
176
  """Initialize DateMapperClimatology.
190
177
 
191
178
  Parameters
@@ -201,9 +188,9 @@ class DateMapperClimatology(DateMapper):
201
188
  """
202
189
  self.year: int = year
203
190
  self.day: int = day
204
- self.hour: Optional[int] = hour
191
+ self.hour: int | None = hour
205
192
 
206
- def transform(self, group_of_dates: Any) -> Generator[Tuple[Any, Any], None, None]:
193
+ def transform(self, group_of_dates: Any) -> Generator[tuple[Any, Any], None, None]:
207
194
  """Transform the group of dates to the specified climatology dates.
208
195
 
209
196
  Parameters
@@ -239,7 +226,7 @@ class DateMapperClimatology(DateMapper):
239
226
  class DateMapperConstant(DateMapper):
240
227
  """A DateMapper implementation that maps dates to a constant date."""
241
228
 
242
- def __init__(self, source: Any, date: Optional[Any] = None) -> None:
229
+ def __init__(self, source: Any, date: Any | None = None) -> None:
243
230
  """Initialize DateMapperConstant.
244
231
 
245
232
  Parameters
@@ -250,9 +237,9 @@ class DateMapperConstant(DateMapper):
250
237
  The constant date to map to.
251
238
  """
252
239
  self.source: Any = source
253
- self.date: Optional[Any] = date
240
+ self.date: Any | None = date
254
241
 
255
- def transform(self, group_of_dates: Any) -> Tuple[Any, Any]:
242
+ def transform(self, group_of_dates: Any) -> tuple[Any, Any]:
256
243
  """Transform the group of dates to a constant date.
257
244
 
258
245
  Parameters
@@ -281,109 +268,3 @@ class DateMapperConstant(DateMapper):
281
268
  group_of_dates,
282
269
  )
283
270
  ]
284
-
285
-
286
- class DateMapperResult(Result):
287
- """A Result implementation that updates the valid datetime of the datasource."""
288
-
289
- def __init__(
290
- self,
291
- context: Any,
292
- action_path: List[str],
293
- group_of_dates: Any,
294
- source_result: Any,
295
- mapper: DateMapper,
296
- original_group_of_dates: Any,
297
- ) -> None:
298
- """Initialize DateMapperResult.
299
-
300
- Parameters
301
- ----------
302
- context : Any
303
- The context.
304
- action_path : list of str
305
- The action path.
306
- group_of_dates : Any
307
- The group of dates.
308
- source_result : Any
309
- The source result.
310
- mapper : DateMapper
311
- The date mapper.
312
- original_group_of_dates : Any
313
- The original group of dates.
314
- """
315
- super().__init__(context, action_path, group_of_dates)
316
-
317
- self.source_results: Any = source_result
318
- self.mapper: DateMapper = mapper
319
- self.original_group_of_dates: Any = original_group_of_dates
320
-
321
- @property
322
- def datasource(self) -> Any:
323
- """Get the datasource with updated valid datetime."""
324
- result: list = []
325
-
326
- for field in self.source_results.datasource:
327
- for date in self.original_group_of_dates:
328
- result.append(new_field_with_valid_datetime(field, date))
329
-
330
- if not result:
331
- raise ValueError("repeated_dates: no input data found")
332
-
333
- return new_fieldlist_from_list(result)
334
-
335
-
336
- class RepeatedDatesAction(Action):
337
- """An Action implementation that selects and transforms a group of dates."""
338
-
339
- def __init__(self, context: Any, action_path: List[str], source: Any, mode: str, **kwargs: Any) -> None:
340
- """Initialize RepeatedDatesAction.
341
-
342
- Args:
343
- context (Any): The context.
344
- action_path (List[str]): The action path.
345
- source (Any): The data source.
346
- mode (str): The mode for date mapping.
347
- **kwargs (Any): Additional arguments.
348
- """
349
- super().__init__(context, action_path, source, mode, **kwargs)
350
-
351
- self.source: Any = action_factory(source, context, action_path + ["source"])
352
- self.mapper: DateMapper = DateMapper.from_mode(mode, self.source, kwargs)
353
-
354
- @trace_select
355
- def select(self, group_of_dates: Any) -> JoinResult:
356
- """Select and transform the group of dates.
357
-
358
- Args:
359
- group_of_dates (Any): The group of dates to select.
360
-
361
- Returns
362
- -------
363
- JoinResult
364
- The result of the join operation.
365
- """
366
- results: list = []
367
- for one_date_group, many_dates_group in self.mapper.transform(group_of_dates):
368
- results.append(
369
- DateMapperResult(
370
- self.context,
371
- self.action_path,
372
- one_date_group,
373
- self.source.select(one_date_group),
374
- self.mapper,
375
- many_dates_group,
376
- )
377
- )
378
-
379
- return JoinResult(self.context, self.action_path, group_of_dates, results)
380
-
381
- def __repr__(self) -> str:
382
- """Get the string representation of the action.
383
-
384
- Returns
385
- -------
386
- str
387
- The string representation.
388
- """
389
- return f"MultiDateMatchAction({self.source}, {self.mapper})"
@@ -1,4 +1,4 @@
1
- # (C) Copyright 2024 Anemoi contributors.
1
+ # (C) Copyright 2025 Anemoi contributors.
2
2
  #
3
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
4
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
@@ -6,3 +6,12 @@
6
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
+
10
+ import logging
11
+ from abc import ABC
12
+
13
+ LOG = logging.getLogger(__name__)
14
+
15
+
16
+ class Result(ABC):
17
+ pass