anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +64 -26
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +5 -6
- anemoi/datasets/create/input/__init__.py +44 -65
- anemoi/datasets/create/input/action.py +296 -238
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +7 -9
- anemoi/datasets/create/input/misc.py +2 -75
- anemoi/datasets/create/input/repeated_dates.py +11 -130
- anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
- anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +120 -145
- anemoi/datasets/create/sources/accumulations2.py +20 -53
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -40
- anemoi/datasets/create/sources/empty.py +22 -19
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +29 -29
- anemoi/datasets/create/sources/grib.py +94 -78
- anemoi/datasets/create/sources/grib_index.py +57 -55
- anemoi/datasets/create/sources/hindcasts.py +57 -59
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +121 -149
- anemoi/datasets/create/sources/netcdf.py +28 -25
- anemoi/datasets/create/sources/opendap.py +28 -26
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +46 -48
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -51
- anemoi/datasets/create/sources/tendencies.py +68 -98
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +20 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +28 -25
- anemoi/datasets/create/sources/zenodo.py +43 -41
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +0 -43
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +48 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -168
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +22 -29
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +15 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +4 -185
- anemoi/datasets/schemas/recipe.json +131 -0
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
- anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
- anemoi/datasets/create/filter.py +0 -48
- anemoi/datasets/create/input/concat.py +0 -164
- anemoi/datasets/create/input/context.py +0 -89
- anemoi/datasets/create/input/empty.py +0 -54
- anemoi/datasets/create/input/filter.py +0 -118
- anemoi/datasets/create/input/function.py +0 -233
- anemoi/datasets/create/input/join.py +0 -130
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -177
- anemoi/datasets/create/input/template.py +0 -162
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
|
@@ -7,253 +7,311 @@
|
|
|
7
7
|
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
8
|
# nor does it submit to any jurisdiction.
|
|
9
9
|
|
|
10
|
-
import json
|
|
11
10
|
import logging
|
|
12
|
-
from copy import deepcopy
|
|
13
|
-
from typing import Any
|
|
14
|
-
from typing import Dict
|
|
15
|
-
from typing import List
|
|
16
11
|
|
|
17
|
-
from
|
|
18
|
-
|
|
19
|
-
from ...dates.groups import GroupOfDates
|
|
20
|
-
from .context import Context
|
|
21
|
-
from .template import substitute
|
|
12
|
+
from anemoi.datasets.dates import DatesProvider
|
|
22
13
|
|
|
23
14
|
LOG = logging.getLogger(__name__)
|
|
24
15
|
|
|
25
16
|
|
|
26
17
|
class Action:
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
The context in which the action exists.
|
|
33
|
-
kwargs : Dict[str, Any]
|
|
34
|
-
Additional keyword arguments.
|
|
35
|
-
args : Any
|
|
36
|
-
Additional positional arguments.
|
|
37
|
-
action_path : List[str]
|
|
38
|
-
The action path.
|
|
18
|
+
"""An "Action" represents a single operation described in the yaml configuration, e.g. a source, a filter,
|
|
19
|
+
pipe, join, etc.
|
|
20
|
+
|
|
21
|
+
See :ref:`operations` for more details.
|
|
22
|
+
|
|
39
23
|
"""
|
|
40
24
|
|
|
41
|
-
def __init__(
|
|
42
|
-
self
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
@classmethod
|
|
75
|
-
def _short_str(cls, x: str) -> str:
|
|
76
|
-
"""Shorten the string representation if it exceeds 1000 characters.
|
|
77
|
-
|
|
78
|
-
Parameters
|
|
79
|
-
----------
|
|
80
|
-
x : str
|
|
81
|
-
The string to shorten.
|
|
82
|
-
|
|
83
|
-
Returns
|
|
84
|
-
-------
|
|
85
|
-
str
|
|
86
|
-
The shortened string.
|
|
87
|
-
"""
|
|
88
|
-
x = str(x)
|
|
89
|
-
if len(x) < 1000:
|
|
90
|
-
return x
|
|
91
|
-
return x[:1000] + "..."
|
|
92
|
-
|
|
93
|
-
def _repr(self, *args: Any, _indent_: str = "\n", _inline_: str = "", **kwargs: Any) -> str:
|
|
94
|
-
"""Generate a string representation of the Action instance.
|
|
95
|
-
|
|
96
|
-
Parameters
|
|
97
|
-
----------
|
|
98
|
-
args : Any
|
|
99
|
-
Additional positional arguments.
|
|
100
|
-
_indent_ : str, optional
|
|
101
|
-
The indentation string, by default "\n".
|
|
102
|
-
_inline_ : str, optional
|
|
103
|
-
The inline string, by default "".
|
|
104
|
-
kwargs : Any
|
|
105
|
-
Additional keyword arguments.
|
|
106
|
-
|
|
107
|
-
Returns
|
|
108
|
-
-------
|
|
109
|
-
str
|
|
110
|
-
The string representation.
|
|
111
|
-
"""
|
|
112
|
-
more = ",".join([str(a)[:5000] for a in args])
|
|
113
|
-
more += ",".join([f"{k}={v}"[:5000] for k, v in kwargs.items()])
|
|
114
|
-
|
|
115
|
-
more = more[:5000]
|
|
116
|
-
txt = f"{self.__class__.__name__}: {_inline_}{_indent_}{more}"
|
|
117
|
-
if _indent_:
|
|
118
|
-
txt = txt.replace("\n", "\n ")
|
|
119
|
-
return txt
|
|
120
|
-
|
|
121
|
-
def __repr__(self) -> str:
|
|
122
|
-
"""Return the string representation of the Action instance.
|
|
123
|
-
|
|
124
|
-
Returns
|
|
125
|
-
-------
|
|
126
|
-
str
|
|
127
|
-
The string representation.
|
|
128
|
-
"""
|
|
129
|
-
return self._repr()
|
|
130
|
-
|
|
131
|
-
def select(self, dates: object, **kwargs: Any) -> None:
|
|
132
|
-
"""Select dates for the action.
|
|
133
|
-
|
|
134
|
-
Parameters
|
|
135
|
-
----------
|
|
136
|
-
dates : object
|
|
137
|
-
The dates to select.
|
|
138
|
-
kwargs : Any
|
|
139
|
-
Additional keyword arguments.
|
|
140
|
-
"""
|
|
141
|
-
self._raise_not_implemented()
|
|
142
|
-
|
|
143
|
-
def _raise_not_implemented(self) -> None:
|
|
144
|
-
"""Raise a NotImplementedError indicating the method is not implemented."""
|
|
145
|
-
raise NotImplementedError(f"Not implemented in {self.__class__.__name__}")
|
|
146
|
-
|
|
147
|
-
def _trace_select(self, group_of_dates: GroupOfDates) -> str:
|
|
148
|
-
"""Trace the selection of a group of dates.
|
|
149
|
-
|
|
150
|
-
Parameters
|
|
151
|
-
----------
|
|
152
|
-
group_of_dates : GroupOfDates
|
|
153
|
-
The group of dates to trace.
|
|
154
|
-
|
|
155
|
-
Returns
|
|
156
|
-
-------
|
|
157
|
-
str
|
|
158
|
-
The trace string.
|
|
159
|
-
"""
|
|
160
|
-
return f"{self.__class__.__name__}({group_of_dates})"
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
class ActionContext(Context):
|
|
164
|
-
"""Represents the context in which an action is performed.
|
|
165
|
-
|
|
166
|
-
Attributes
|
|
167
|
-
----------
|
|
168
|
-
order_by : str
|
|
169
|
-
The order by criteria.
|
|
170
|
-
flatten_grid : bool
|
|
171
|
-
Whether to flatten the grid.
|
|
172
|
-
remapping : Dict[str, Any]
|
|
173
|
-
The remapping configuration.
|
|
174
|
-
use_grib_paramid : bool
|
|
175
|
-
Whether to use GRIB parameter ID.
|
|
25
|
+
def __init__(self, config, *path):
|
|
26
|
+
self.config = config
|
|
27
|
+
self.path = path
|
|
28
|
+
assert path[0] in (
|
|
29
|
+
"input",
|
|
30
|
+
"data_sources",
|
|
31
|
+
), f"{self.__class__.__name__}: path must start with 'input' or 'data_sources': {path}"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Concat(Action):
|
|
35
|
+
"""The Concat contruct is used to concat different actions that are responsible
|
|
36
|
+
for delivery fields for different dates.
|
|
37
|
+
|
|
38
|
+
See :ref:`building-concat` for more details.
|
|
39
|
+
|
|
40
|
+
.. block-code:: yaml
|
|
41
|
+
|
|
42
|
+
input:
|
|
43
|
+
concat:
|
|
44
|
+
- dates:
|
|
45
|
+
start: 2023-01-01
|
|
46
|
+
end: 2023-01-31
|
|
47
|
+
frequency: 1d
|
|
48
|
+
action: # some action
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
- dates:
|
|
52
|
+
start: 2023-02-01
|
|
53
|
+
end: 2023-02-28
|
|
54
|
+
frequency: 1d
|
|
55
|
+
action: # some action
|
|
56
|
+
|
|
176
57
|
"""
|
|
177
58
|
|
|
178
|
-
def __init__(self,
|
|
179
|
-
""
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
self.
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
59
|
+
def __init__(self, config, *path):
|
|
60
|
+
super().__init__(config, *path, "concat")
|
|
61
|
+
|
|
62
|
+
assert isinstance(config, list), f"Value must be a dict {list}"
|
|
63
|
+
|
|
64
|
+
self.choices = []
|
|
65
|
+
|
|
66
|
+
for i, item in enumerate(config):
|
|
67
|
+
|
|
68
|
+
dates = item["dates"]
|
|
69
|
+
filtering_dates = DatesProvider.from_config(**dates)
|
|
70
|
+
action = action_factory({k: v for k, v in item.items() if k != "dates"}, *self.path, str(i))
|
|
71
|
+
self.choices.append((filtering_dates, action))
|
|
72
|
+
|
|
73
|
+
def __repr__(self):
|
|
74
|
+
return f"Concat({self.choices})"
|
|
75
|
+
|
|
76
|
+
def __call__(self, context, argument):
|
|
77
|
+
|
|
78
|
+
results = context.empty_result()
|
|
79
|
+
|
|
80
|
+
for filtering_dates, action in self.choices:
|
|
81
|
+
dates = context.matching_dates(filtering_dates, argument)
|
|
82
|
+
if len(dates) == 0:
|
|
83
|
+
continue
|
|
84
|
+
results += action(context, dates)
|
|
85
|
+
|
|
86
|
+
return context.register(results, self.path)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class Join(Action):
|
|
90
|
+
"""Implement the join operation to combine results from multiple actions.
|
|
91
|
+
|
|
92
|
+
See :ref:`building-join` for more details.
|
|
93
|
+
|
|
94
|
+
.. block-code:: yaml
|
|
95
|
+
|
|
96
|
+
input:
|
|
97
|
+
join:
|
|
98
|
+
- grib:
|
|
99
|
+
...
|
|
100
|
+
|
|
101
|
+
- netcdf: # some other action
|
|
102
|
+
...
|
|
103
|
+
|
|
215
104
|
"""
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
105
|
+
|
|
106
|
+
def __init__(self, config, *path):
|
|
107
|
+
super().__init__(config, *path, "join")
|
|
108
|
+
|
|
109
|
+
assert isinstance(config, list), f"Value of Join Action must be a list, got: {config}"
|
|
110
|
+
|
|
111
|
+
self.actions = [action_factory(item, *self.path, str(i)) for i, item in enumerate(config)]
|
|
112
|
+
|
|
113
|
+
def __repr__(self):
|
|
114
|
+
return f"Join({self.actions})"
|
|
115
|
+
|
|
116
|
+
def __call__(self, context, argument):
|
|
117
|
+
results = context.empty_result()
|
|
118
|
+
|
|
119
|
+
for action in self.actions:
|
|
120
|
+
results += action(context, argument)
|
|
121
|
+
|
|
122
|
+
return context.register(results, self.path)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class Pipe(Action):
|
|
126
|
+
"""Implement the pipe operation to chain results from a
|
|
127
|
+
source through multiple filters.
|
|
128
|
+
|
|
129
|
+
See :ref:`building-pipe` for more details.
|
|
130
|
+
|
|
131
|
+
.. block-code:: yaml
|
|
132
|
+
|
|
133
|
+
input:
|
|
134
|
+
pipe:
|
|
135
|
+
- grib:
|
|
136
|
+
...
|
|
137
|
+
|
|
138
|
+
- rename:
|
|
139
|
+
...
|
|
140
|
+
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
def __init__(self, config, *path):
|
|
144
|
+
assert isinstance(config, list), f"Value of Pipe Action must be a list, got {config}"
|
|
145
|
+
super().__init__(config, *path, "pipe")
|
|
146
|
+
self.actions = [action_factory(item, *self.path, str(i)) for i, item in enumerate(config)]
|
|
147
|
+
|
|
148
|
+
def __repr__(self):
|
|
149
|
+
return f"Pipe({self.actions})"
|
|
150
|
+
|
|
151
|
+
def __call__(self, context, argument):
|
|
152
|
+
result = context.empty_result()
|
|
153
|
+
|
|
154
|
+
for i, action in enumerate(self.actions):
|
|
155
|
+
if i == 0:
|
|
156
|
+
result = action(context, argument)
|
|
157
|
+
else:
|
|
158
|
+
result = action(context, result)
|
|
159
|
+
|
|
160
|
+
return context.register(result, self.path)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class Function(Action):
|
|
164
|
+
"""Base class for sources and filters."""
|
|
165
|
+
|
|
166
|
+
def __init__(self, config, *path):
|
|
167
|
+
super().__init__(config, *path, self.name)
|
|
168
|
+
|
|
169
|
+
def __call__(self, context, argument):
|
|
170
|
+
|
|
171
|
+
config = context.resolve(self.config) # Substitute the ${} variables in the config
|
|
172
|
+
|
|
173
|
+
config["_type"] = self.name # Find a better way to do this
|
|
174
|
+
|
|
175
|
+
source = self.create_object(context, config)
|
|
176
|
+
|
|
177
|
+
return context.register(self.call_object(context, source, argument), self.path)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class DatasetSourceMixin:
|
|
181
|
+
"""Mixin class for sources defined in anemoi-datasets"""
|
|
182
|
+
|
|
183
|
+
def create_object(self, context, config):
|
|
184
|
+
from anemoi.datasets.create.sources import create_source as create_datasets_source
|
|
185
|
+
|
|
186
|
+
return create_datasets_source(context, config)
|
|
187
|
+
|
|
188
|
+
def call_object(self, context, source, argument):
|
|
189
|
+
return source.execute(context.source_argument(argument))
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class TransformSourceMixin:
|
|
193
|
+
"""Mixin class for sources defined in anemoi-transform"""
|
|
194
|
+
|
|
195
|
+
def create_object(self, context, config):
|
|
196
|
+
from anemoi.transform.sources import create_source as create_transform_source
|
|
197
|
+
|
|
198
|
+
return create_transform_source(context, config)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class TransformFilterMixin:
|
|
202
|
+
"""Mixin class for filters defined in anemoi-transform"""
|
|
203
|
+
|
|
204
|
+
def create_object(self, context, config):
|
|
205
|
+
from anemoi.transform.filters import create_filter as create_transform_filter
|
|
206
|
+
|
|
207
|
+
return create_transform_filter(context, config)
|
|
208
|
+
|
|
209
|
+
def call_object(self, context, filter, argument):
|
|
210
|
+
return filter.forward(context.filter_argument(argument))
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class FilterFunction(Function):
|
|
214
|
+
"""Action to call a filter on the argument (e.g. rename, regrid, etc.)."""
|
|
215
|
+
|
|
216
|
+
def __call__(self, context, argument):
|
|
217
|
+
return self.call(context, argument, context.filter_argument)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _make_name(name, what):
|
|
221
|
+
name = name.replace("_", "-")
|
|
222
|
+
name = "".join(x.title() for x in name.split("-"))
|
|
223
|
+
return name + what.title()
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def new_source(name, mixin):
|
|
227
|
+
return type(
|
|
228
|
+
_make_name(name, "source"),
|
|
229
|
+
(Function, mixin),
|
|
230
|
+
{"name": name},
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def new_filter(name, mixin):
|
|
235
|
+
return type(
|
|
236
|
+
_make_name(name, "filter"),
|
|
237
|
+
(Function, mixin),
|
|
238
|
+
{"name": name},
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class DataSources(Action):
|
|
243
|
+
"""Action to call a source (e.g. mars, netcdf, grib, etc.)."""
|
|
244
|
+
|
|
245
|
+
def __init__(self, config, *path):
|
|
246
|
+
super().__init__(config, *path)
|
|
247
|
+
assert isinstance(config, (dict, list)), f"Invalid config type: {type(config)}"
|
|
248
|
+
if isinstance(config, dict):
|
|
249
|
+
self.sources = {k: action_factory(v, *path, k) for k, v in config.items()}
|
|
250
|
+
else:
|
|
251
|
+
self.sources = {i: action_factory(v, *path, str(i)) for i, v in enumerate(config)}
|
|
252
|
+
|
|
253
|
+
def __call__(self, context, argument):
|
|
254
|
+
for name, source in self.sources.items():
|
|
255
|
+
context.register(source(context, argument), self.path + (name,))
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class Recipe(Action):
|
|
259
|
+
"""Action that represent a recipe (i.e. a sequence of data_sources and input)."""
|
|
260
|
+
|
|
261
|
+
def __init__(self, input, data_sources):
|
|
262
|
+
self.input = input
|
|
263
|
+
self.data_sources = data_sources
|
|
264
|
+
|
|
265
|
+
def __call__(self, context, argument):
|
|
266
|
+
# Load data_sources
|
|
267
|
+
self.data_sources(context, argument)
|
|
268
|
+
return self.input(context, argument)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
KLASS = {
|
|
272
|
+
"concat": Concat,
|
|
273
|
+
"join": Join,
|
|
274
|
+
"pipe": Pipe,
|
|
275
|
+
"data-sources": DataSources,
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
LEN_KLASS = len(KLASS)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def make(key, config, *path):
|
|
282
|
+
|
|
283
|
+
if LEN_KLASS == len(KLASS):
|
|
284
|
+
|
|
285
|
+
# Load pluggins
|
|
286
|
+
from anemoi.transform.filters import filter_registry as transform_filter_registry
|
|
287
|
+
from anemoi.transform.sources import source_registry as transform_source_registry
|
|
288
|
+
|
|
289
|
+
from anemoi.datasets.create.sources import source_registry as dataset_source_registry
|
|
290
|
+
|
|
291
|
+
# Register sources, local first
|
|
292
|
+
for name in dataset_source_registry.registered:
|
|
293
|
+
if name not in KLASS:
|
|
294
|
+
KLASS[name.replace("_", "-")] = new_source(name, DatasetSourceMixin)
|
|
295
|
+
|
|
296
|
+
for name in transform_source_registry.registered:
|
|
297
|
+
if name not in KLASS:
|
|
298
|
+
KLASS[name.replace("_", "-")] = new_source(name, TransformSourceMixin)
|
|
299
|
+
|
|
300
|
+
# Register filters
|
|
301
|
+
for name in transform_filter_registry.registered:
|
|
302
|
+
if name not in KLASS:
|
|
303
|
+
KLASS[name.replace("_", "-")] = new_filter(name, TransformFilterMixin)
|
|
304
|
+
|
|
305
|
+
return KLASS[key.replace("_", "-")](config, *path)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def action_factory(data, *path):
|
|
309
|
+
|
|
310
|
+
assert len(path) > 0, f"Path must contain at least one element {path}"
|
|
311
|
+
assert path[0] in ("input", "data_sources")
|
|
312
|
+
|
|
313
|
+
assert isinstance(data, dict), f"Input data must be a dictionary, got {type(data)}"
|
|
314
|
+
assert len(data) == 1, f"Input data must contain exactly one key-value pair {data} {'.'.join(x for x in path)}"
|
|
315
|
+
|
|
316
|
+
key, value = next(iter(data.items()))
|
|
317
|
+
return make(key, value, *path)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from abc import ABC
|
|
12
|
+
from abc import abstractmethod
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
LOG = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Context(ABC):
|
|
19
|
+
"""Context for building input data."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, /, argument: Any) -> None:
|
|
22
|
+
self.results = {}
|
|
23
|
+
self.cache = {}
|
|
24
|
+
self.argument = argument
|
|
25
|
+
|
|
26
|
+
def trace(self, emoji, *message) -> None:
|
|
27
|
+
|
|
28
|
+
print(f"{emoji}: {message}")
|
|
29
|
+
|
|
30
|
+
def register(self, data: Any, path: list[str]) -> Any:
|
|
31
|
+
|
|
32
|
+
if not path:
|
|
33
|
+
return data
|
|
34
|
+
|
|
35
|
+
assert path[0] in ("input", "data_sources"), path
|
|
36
|
+
|
|
37
|
+
LOG.info(f"Registering data at path: {path}")
|
|
38
|
+
self.results[tuple(path)] = data
|
|
39
|
+
return data
|
|
40
|
+
|
|
41
|
+
def resolve(self, config):
|
|
42
|
+
config = config.copy()
|
|
43
|
+
|
|
44
|
+
for key, value in list(config.items()):
|
|
45
|
+
if isinstance(value, str) and value.startswith("${") and value.endswith("}"):
|
|
46
|
+
path = tuple(value[2:-1].split("."))
|
|
47
|
+
if path in self.results:
|
|
48
|
+
config[key] = self.results[path]
|
|
49
|
+
else:
|
|
50
|
+
LOG.warning(f"Path not found {path}")
|
|
51
|
+
for p in sorted(self.results):
|
|
52
|
+
LOG.info(f" Available paths: {p}")
|
|
53
|
+
raise KeyError(f"Path {path} not found in results: {self.results.keys()}")
|
|
54
|
+
|
|
55
|
+
return config
|
|
56
|
+
|
|
57
|
+
def create_source(self, config: Any, *path) -> Any:
|
|
58
|
+
from anemoi.datasets.create.input.action import action_factory
|
|
59
|
+
|
|
60
|
+
if not isinstance(config, dict):
|
|
61
|
+
# It is already a result (e.g. ekd.FieldList), loaded from ${a.b.c}
|
|
62
|
+
# TODO: something more elegant
|
|
63
|
+
return lambda *args, **kwargs: config
|
|
64
|
+
|
|
65
|
+
return action_factory(config, *path)
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def empty_result(self) -> Any: ...
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def create_result(self, data: Any) -> Any: ...
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from earthkit.data.core.order import build_remapping
|
|
14
|
+
|
|
15
|
+
from ..result.field import FieldResult
|
|
16
|
+
from . import Context
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FieldContext(Context):
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
/,
|
|
24
|
+
argument: Any,
|
|
25
|
+
order_by: str,
|
|
26
|
+
flatten_grid: bool,
|
|
27
|
+
remapping: dict[str, Any],
|
|
28
|
+
use_grib_paramid: bool,
|
|
29
|
+
) -> None:
|
|
30
|
+
super().__init__(argument)
|
|
31
|
+
self.order_by = order_by
|
|
32
|
+
self.flatten_grid = flatten_grid
|
|
33
|
+
self.remapping = build_remapping(remapping)
|
|
34
|
+
self.use_grib_paramid = use_grib_paramid
|
|
35
|
+
self.partial_ok = False
|
|
36
|
+
|
|
37
|
+
def empty_result(self) -> Any:
|
|
38
|
+
import earthkit.data as ekd
|
|
39
|
+
|
|
40
|
+
return ekd.from_source("empty")
|
|
41
|
+
|
|
42
|
+
def source_argument(self, argument: Any) -> Any:
|
|
43
|
+
return argument # .dates
|
|
44
|
+
|
|
45
|
+
def filter_argument(self, argument: Any) -> Any:
|
|
46
|
+
return argument
|
|
47
|
+
|
|
48
|
+
def create_result(self, data):
|
|
49
|
+
return FieldResult(self, data)
|
|
50
|
+
|
|
51
|
+
def matching_dates(self, filtering_dates, group_of_dates: Any) -> Any:
|
|
52
|
+
from anemoi.datasets.dates.groups import GroupOfDates
|
|
53
|
+
|
|
54
|
+
return GroupOfDates(sorted(set(group_of_dates) & set(filtering_dates)), group_of_dates.provider)
|