anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/__init__.py +1 -2
- anemoi/datasets/_version.py +16 -3
- anemoi/datasets/commands/check.py +1 -1
- anemoi/datasets/commands/copy.py +1 -2
- anemoi/datasets/commands/create.py +1 -1
- anemoi/datasets/commands/inspect.py +27 -35
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/commands/validate.py +59 -0
- anemoi/datasets/compute/recentre.py +3 -6
- anemoi/datasets/create/__init__.py +64 -26
- anemoi/datasets/create/check.py +10 -12
- anemoi/datasets/create/chunks.py +1 -2
- anemoi/datasets/create/config.py +5 -6
- anemoi/datasets/create/input/__init__.py +44 -65
- anemoi/datasets/create/input/action.py +296 -238
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +7 -9
- anemoi/datasets/create/input/misc.py +2 -75
- anemoi/datasets/create/input/repeated_dates.py +11 -130
- anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
- anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
- anemoi/datasets/create/input/trace.py +1 -1
- anemoi/datasets/create/patch.py +1 -2
- anemoi/datasets/create/persistent.py +3 -5
- anemoi/datasets/create/size.py +1 -3
- anemoi/datasets/create/sources/accumulations.py +120 -145
- anemoi/datasets/create/sources/accumulations2.py +20 -53
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -40
- anemoi/datasets/create/sources/empty.py +22 -19
- anemoi/datasets/create/sources/fdb.py +133 -0
- anemoi/datasets/create/sources/forcings.py +29 -29
- anemoi/datasets/create/sources/grib.py +94 -78
- anemoi/datasets/create/sources/grib_index.py +57 -55
- anemoi/datasets/create/sources/hindcasts.py +57 -59
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +121 -149
- anemoi/datasets/create/sources/netcdf.py +28 -25
- anemoi/datasets/create/sources/opendap.py +28 -26
- anemoi/datasets/create/sources/patterns.py +4 -6
- anemoi/datasets/create/sources/recentre.py +46 -48
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -51
- anemoi/datasets/create/sources/tendencies.py +68 -98
- anemoi/datasets/create/sources/xarray.py +4 -6
- anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
- anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
- anemoi/datasets/create/sources/xarray_support/field.py +20 -16
- anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
- anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
- anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
- anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
- anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
- anemoi/datasets/create/sources/xarray_support/time.py +10 -13
- anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
- anemoi/datasets/create/sources/xarray_zarr.py +28 -25
- anemoi/datasets/create/sources/zenodo.py +43 -41
- anemoi/datasets/create/statistics/__init__.py +3 -6
- anemoi/datasets/create/testing.py +4 -0
- anemoi/datasets/create/typing.py +1 -2
- anemoi/datasets/create/utils.py +0 -43
- anemoi/datasets/create/zarr.py +7 -2
- anemoi/datasets/data/__init__.py +15 -6
- anemoi/datasets/data/complement.py +7 -12
- anemoi/datasets/data/concat.py +5 -8
- anemoi/datasets/data/dataset.py +48 -47
- anemoi/datasets/data/debug.py +7 -9
- anemoi/datasets/data/ensemble.py +4 -6
- anemoi/datasets/data/fill_missing.py +7 -10
- anemoi/datasets/data/forwards.py +22 -26
- anemoi/datasets/data/grids.py +12 -168
- anemoi/datasets/data/indexing.py +9 -12
- anemoi/datasets/data/interpolate.py +7 -15
- anemoi/datasets/data/join.py +8 -12
- anemoi/datasets/data/masked.py +6 -11
- anemoi/datasets/data/merge.py +5 -9
- anemoi/datasets/data/misc.py +41 -45
- anemoi/datasets/data/missing.py +11 -16
- anemoi/datasets/data/observations/__init__.py +8 -14
- anemoi/datasets/data/padded.py +3 -5
- anemoi/datasets/data/records/backends/__init__.py +2 -2
- anemoi/datasets/data/rescale.py +5 -12
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/select.py +13 -16
- anemoi/datasets/data/statistics.py +4 -7
- anemoi/datasets/data/stores.py +22 -29
- anemoi/datasets/data/subset.py +8 -11
- anemoi/datasets/data/unchecked.py +7 -11
- anemoi/datasets/data/xy.py +25 -21
- anemoi/datasets/dates/__init__.py +15 -18
- anemoi/datasets/dates/groups.py +7 -10
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +4 -185
- anemoi/datasets/schemas/recipe.json +131 -0
- anemoi/datasets/testing.py +93 -7
- anemoi/datasets/validate.py +598 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
- anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
- anemoi/datasets/create/filter.py +0 -48
- anemoi/datasets/create/input/concat.py +0 -164
- anemoi/datasets/create/input/context.py +0 -89
- anemoi/datasets/create/input/empty.py +0 -54
- anemoi/datasets/create/input/filter.py +0 -118
- anemoi/datasets/create/input/function.py +0 -233
- anemoi/datasets/create/input/join.py +0 -130
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -177
- anemoi/datasets/create/input/template.py +0 -162
- anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from glom import assign
|
|
17
|
+
from glom import delete
|
|
18
|
+
from glom import glom
|
|
19
|
+
|
|
20
|
+
from anemoi.datasets.create import validate_config
|
|
21
|
+
from anemoi.datasets.dumper import yaml_dump
|
|
22
|
+
|
|
23
|
+
LOG = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def find_paths(data, target_key=None, target_value=None, *path):
|
|
27
|
+
|
|
28
|
+
matches = []
|
|
29
|
+
|
|
30
|
+
if isinstance(data, dict):
|
|
31
|
+
for k, v in data.items():
|
|
32
|
+
if (target_key is not None and k == target_key) or (target_value is not None and v == target_value):
|
|
33
|
+
matches.append(list(path) + [k])
|
|
34
|
+
matches.extend(find_paths(v, target_key, target_value, *path, k))
|
|
35
|
+
elif isinstance(data, Sequence) and not isinstance(data, (str, bytes)):
|
|
36
|
+
for i, item in enumerate(data):
|
|
37
|
+
matches.extend(find_paths(item, target_key, target_value, *path, str(i)))
|
|
38
|
+
return matches
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def find_chevrons(data, *path):
|
|
42
|
+
|
|
43
|
+
matches = []
|
|
44
|
+
|
|
45
|
+
if isinstance(data, dict):
|
|
46
|
+
for k, v in data.items():
|
|
47
|
+
if k == "<<":
|
|
48
|
+
matches.append(list(path) + [k])
|
|
49
|
+
matches.extend(find_chevrons(v, *path, k))
|
|
50
|
+
elif isinstance(data, list):
|
|
51
|
+
for i, item in enumerate(data):
|
|
52
|
+
matches.extend(find_chevrons(item, *path, str(i)))
|
|
53
|
+
return matches
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def find_paths_in_substrees(path, obj, cur_path=None):
|
|
57
|
+
if cur_path is None:
|
|
58
|
+
cur_path = []
|
|
59
|
+
matches = []
|
|
60
|
+
try:
|
|
61
|
+
glom(obj, path) # just to check existence
|
|
62
|
+
matches.append(cur_path + path.split("."))
|
|
63
|
+
except Exception:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
if isinstance(obj, dict):
|
|
67
|
+
for k, v in obj.items():
|
|
68
|
+
matches.extend(find_paths_in_substrees(path, v, cur_path + [k]))
|
|
69
|
+
elif isinstance(obj, list):
|
|
70
|
+
for i, v in enumerate(obj):
|
|
71
|
+
matches.extend(find_paths_in_substrees(path, v, cur_path + [str(i)]))
|
|
72
|
+
return matches
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
MIGRATE = {
|
|
76
|
+
"output.statistics_end": "statistics.end",
|
|
77
|
+
"has_nans": "statistics.allow_nans",
|
|
78
|
+
"loop.dates.group_by": "build.group_by",
|
|
79
|
+
"loop.0.dates.group_by": "build.group_by",
|
|
80
|
+
"loop.dates": "dates",
|
|
81
|
+
"loop.0.dates": "dates",
|
|
82
|
+
"copyright": "attribution",
|
|
83
|
+
"dates.<<": "dates",
|
|
84
|
+
"options.group_by": "build.group_by",
|
|
85
|
+
"loops.0.loop_a.dates": "dates",
|
|
86
|
+
"loop.0.loop_a.dates": "dates",
|
|
87
|
+
"dates.stop": "dates.end",
|
|
88
|
+
"dates.group_by": "build.group_by",
|
|
89
|
+
"include.mars": "data_sources.mars.mars",
|
|
90
|
+
"ensemble_dimension": "build.ensemble_dimension",
|
|
91
|
+
"flatten_grid": "build.flatten_grid",
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
DELETE = [
|
|
95
|
+
"purpose",
|
|
96
|
+
# "input.join.0.label",
|
|
97
|
+
"status",
|
|
98
|
+
"common",
|
|
99
|
+
"config_format_version",
|
|
100
|
+
"aliases",
|
|
101
|
+
# "platform",
|
|
102
|
+
"loops.0.loop_a.applies_to",
|
|
103
|
+
"loop.0.loop_a.applies_to",
|
|
104
|
+
"dataset_status",
|
|
105
|
+
"alias",
|
|
106
|
+
"resources",
|
|
107
|
+
"input.dates.<<",
|
|
108
|
+
"input.dates.join.0.label.name",
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
SOURCES = {
|
|
113
|
+
"oper-accumulations": "accumulations",
|
|
114
|
+
"era5-accumulations": "accumulations",
|
|
115
|
+
"ensemble-perturbations": "recentre",
|
|
116
|
+
"ensemble_perturbations": "recentre",
|
|
117
|
+
"perturbations": "recentre",
|
|
118
|
+
"custom-regrid": "regrid",
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
MARKER = object()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _delete(config, path):
|
|
125
|
+
x = glom(config, path, default=MARKER)
|
|
126
|
+
if x is MARKER:
|
|
127
|
+
return
|
|
128
|
+
delete(config, path)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _move(config, path, new_path, result):
|
|
132
|
+
x = glom(config, path, default=MARKER)
|
|
133
|
+
if x is MARKER:
|
|
134
|
+
return
|
|
135
|
+
delete(result, path)
|
|
136
|
+
assign(result, new_path, x, missing=dict)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _fix_input_0(config):
|
|
140
|
+
if isinstance(config["input"], dict):
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
input = config["input"]
|
|
144
|
+
new_input = []
|
|
145
|
+
|
|
146
|
+
blocks = {}
|
|
147
|
+
first = None
|
|
148
|
+
for block in input:
|
|
149
|
+
assert isinstance(block, dict), block
|
|
150
|
+
|
|
151
|
+
assert len(block) == 1, block
|
|
152
|
+
|
|
153
|
+
block_name, values = list(block.items())[0]
|
|
154
|
+
|
|
155
|
+
if "kwargs" in values:
|
|
156
|
+
inherit = values.pop("inherit", None)
|
|
157
|
+
assert len(values) == 1, values
|
|
158
|
+
values = values["kwargs"]
|
|
159
|
+
values.pop("date", None)
|
|
160
|
+
source_name = values.pop("name", None)
|
|
161
|
+
|
|
162
|
+
if inherit is not None:
|
|
163
|
+
if inherit.startswith("$"):
|
|
164
|
+
inherit = inherit[1:]
|
|
165
|
+
inherited = blocks[inherit].copy()
|
|
166
|
+
inherited.update(values)
|
|
167
|
+
values = inherited
|
|
168
|
+
|
|
169
|
+
if first is None:
|
|
170
|
+
first = source_name
|
|
171
|
+
|
|
172
|
+
blocks[block_name] = values.copy()
|
|
173
|
+
|
|
174
|
+
new_input.append({SOURCES.get(source_name, source_name): values.copy()})
|
|
175
|
+
else:
|
|
176
|
+
assert False, f"Block {block_name} does not have 'kwargs': {values}"
|
|
177
|
+
|
|
178
|
+
blocks[block_name] = values.copy()
|
|
179
|
+
|
|
180
|
+
config["input"] = dict(join=new_input)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _fix_input_1(result, config):
|
|
184
|
+
if isinstance(config["input"], dict):
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
input = config["input"]
|
|
188
|
+
join = []
|
|
189
|
+
for k in input:
|
|
190
|
+
assert isinstance(k, dict)
|
|
191
|
+
assert len(k) == 1, f"Input key {k} is not a string: {input}"
|
|
192
|
+
name, values = list(k.items())[0]
|
|
193
|
+
join.append(values)
|
|
194
|
+
|
|
195
|
+
result["input"] = {"join": join}
|
|
196
|
+
config["input"] = result["input"].copy()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def remove_empties(config: dict) -> None:
|
|
200
|
+
"""Remove empty dictionaries and lists from the config."""
|
|
201
|
+
if isinstance(config, dict):
|
|
202
|
+
keys_to_delete = [k for k, v in config.items() if v in (None, {}, [], [{}])]
|
|
203
|
+
|
|
204
|
+
for k in keys_to_delete:
|
|
205
|
+
del config[k]
|
|
206
|
+
|
|
207
|
+
for k, v in config.items():
|
|
208
|
+
remove_empties(v)
|
|
209
|
+
|
|
210
|
+
if isinstance(config, list):
|
|
211
|
+
for item in config:
|
|
212
|
+
remove_empties(item)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _fix_loops(result: dict, config: dict) -> None:
|
|
216
|
+
if "loops" not in config:
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
input = config["input"]
|
|
220
|
+
loops = config["loops"]
|
|
221
|
+
|
|
222
|
+
assert isinstance(loops, list), loops
|
|
223
|
+
assert isinstance(input, list), input
|
|
224
|
+
|
|
225
|
+
entries = {}
|
|
226
|
+
dates_block = None
|
|
227
|
+
for loop in loops:
|
|
228
|
+
assert isinstance(loop, dict), loop
|
|
229
|
+
assert len(loop) == 1, loop
|
|
230
|
+
loop = list(loop.values())[0]
|
|
231
|
+
applies_to = loop["applies_to"]
|
|
232
|
+
dates = loop["dates"]
|
|
233
|
+
assert isinstance(applies_to, list), (applies_to, loop)
|
|
234
|
+
for a in applies_to:
|
|
235
|
+
entries[a] = dates.copy()
|
|
236
|
+
|
|
237
|
+
if "start" in dates:
|
|
238
|
+
start = dates["start"]
|
|
239
|
+
else:
|
|
240
|
+
start = max(dates["values"])
|
|
241
|
+
|
|
242
|
+
if "end" in dates or "stop" in dates:
|
|
243
|
+
end = dates.get("end", dates.get("stop"))
|
|
244
|
+
else:
|
|
245
|
+
end = min(dates["values"])
|
|
246
|
+
|
|
247
|
+
if dates_block is None:
|
|
248
|
+
dates_block = {
|
|
249
|
+
"start": start,
|
|
250
|
+
"end": end,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if "frequency" in dates:
|
|
254
|
+
if "frequency" not in dates_block:
|
|
255
|
+
dates_block["frequency"] = dates["frequency"]
|
|
256
|
+
else:
|
|
257
|
+
assert dates_block["frequency"] == dates["frequency"], (dates_block["frequency"], dates["frequency"])
|
|
258
|
+
|
|
259
|
+
dates_block["start"] = min(dates_block["start"], start)
|
|
260
|
+
dates_block["end"] = max(dates_block["end"], end)
|
|
261
|
+
|
|
262
|
+
concat = []
|
|
263
|
+
result["input"] = {"concat": concat}
|
|
264
|
+
|
|
265
|
+
print("Found loops:", entries)
|
|
266
|
+
|
|
267
|
+
for block in input:
|
|
268
|
+
assert isinstance(block, dict), block
|
|
269
|
+
assert len(block) == 1, block
|
|
270
|
+
name, values = list(block.items())[0]
|
|
271
|
+
assert name in entries, f"Loop {name} not found in loops: {list(entries.keys())}"
|
|
272
|
+
dates = entries[name].copy()
|
|
273
|
+
|
|
274
|
+
assert "kwargs" not in values
|
|
275
|
+
|
|
276
|
+
concat.append(dict(dates=dates, **values))
|
|
277
|
+
|
|
278
|
+
d = concat[0]["dates"]
|
|
279
|
+
if all(c["dates"] == d for c in concat):
|
|
280
|
+
join = []
|
|
281
|
+
for c in concat:
|
|
282
|
+
del c["dates"]
|
|
283
|
+
join.append(c)
|
|
284
|
+
result["input"] = {"join": join}
|
|
285
|
+
|
|
286
|
+
del config["loops"]
|
|
287
|
+
config["input"] = result["input"].copy()
|
|
288
|
+
config["dates"] = dates_block.copy()
|
|
289
|
+
del result["loops"]
|
|
290
|
+
result["dates"] = dates_block
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _fix_other(result: dict, config: dict) -> None:
|
|
294
|
+
paths = find_paths(config, target_key="source_or_dataset", target_value="$previous_data")
|
|
295
|
+
for p in paths:
|
|
296
|
+
print(f"Fixing {'.'.join(p)}")
|
|
297
|
+
assign(result, ".".join(p[:-1] + ["template"]), "${input.join.0.mars}", missing=dict)
|
|
298
|
+
delete(result, ".".join(p))
|
|
299
|
+
|
|
300
|
+
paths = find_paths(config, target_key="date", target_value="$dates")
|
|
301
|
+
for p in paths:
|
|
302
|
+
delete(result, ".".join(p))
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _fix_join(result: dict, config: dict) -> None:
|
|
306
|
+
print("Fixing join...")
|
|
307
|
+
input = config["input"]
|
|
308
|
+
if "dates" in input and "join" in input["dates"]:
|
|
309
|
+
result["input"]["join"] = input["dates"]["join"]
|
|
310
|
+
config["input"]["join"] = input["dates"]["join"].copy()
|
|
311
|
+
|
|
312
|
+
if "join" not in input:
|
|
313
|
+
return
|
|
314
|
+
|
|
315
|
+
join = input["join"]
|
|
316
|
+
new_join = []
|
|
317
|
+
for j in join:
|
|
318
|
+
assert isinstance(j, dict)
|
|
319
|
+
assert len(j) == 1
|
|
320
|
+
|
|
321
|
+
key, values = list(j.items())[0]
|
|
322
|
+
|
|
323
|
+
if key not in ("label", "source"):
|
|
324
|
+
return
|
|
325
|
+
|
|
326
|
+
assert isinstance(values, dict), f"Join values for {key} should be a dict: {values}"
|
|
327
|
+
if key == "label":
|
|
328
|
+
j = values
|
|
329
|
+
j.pop("name")
|
|
330
|
+
key, values = list(j.items())[0]
|
|
331
|
+
|
|
332
|
+
print(values)
|
|
333
|
+
source_name = values.pop("name", "mars")
|
|
334
|
+
new_join.append(
|
|
335
|
+
{
|
|
336
|
+
SOURCES.get(source_name, source_name): values,
|
|
337
|
+
}
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
result["input"] = {"join": new_join}
|
|
341
|
+
config["input"] = result["input"].copy()
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _fix_sources(config: dict, what) -> None:
|
|
345
|
+
|
|
346
|
+
input = config["input"]
|
|
347
|
+
if what not in input:
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
join = input[what]
|
|
351
|
+
new_join = []
|
|
352
|
+
for j in join:
|
|
353
|
+
assert isinstance(j, dict)
|
|
354
|
+
assert len(j) == 1, j
|
|
355
|
+
|
|
356
|
+
key, values = list(j.items())[0]
|
|
357
|
+
|
|
358
|
+
key = SOURCES.get(key, key)
|
|
359
|
+
|
|
360
|
+
new_join.append(
|
|
361
|
+
{
|
|
362
|
+
key: values,
|
|
363
|
+
}
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
config["input"][what] = new_join
|
|
367
|
+
config["input"][what] = new_join.copy()
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _assign(config, path, value):
|
|
371
|
+
print(f"Assign {path} {value}")
|
|
372
|
+
assign(config, path, value)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _fix_chevrons(result: dict, config: dict) -> None:
|
|
376
|
+
print("Fixing chevrons...")
|
|
377
|
+
paths = find_chevrons(config)
|
|
378
|
+
for p in paths:
|
|
379
|
+
a = glom(config, ".".join(p))
|
|
380
|
+
b = glom(config, ".".join(p[:-1]))
|
|
381
|
+
delete(result, ".".join(p))
|
|
382
|
+
a.update(b)
|
|
383
|
+
assign(result, ".".join(p[:-1]), a)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _fix_some(config: dict) -> None:
|
|
387
|
+
|
|
388
|
+
paths = find_paths_in_substrees("label.function", config)
|
|
389
|
+
for p in paths:
|
|
390
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
391
|
+
node = glom(config, ".".join(p[:-1]))
|
|
392
|
+
assert node
|
|
393
|
+
_assign(config, ".".join(p[:-2]), node)
|
|
394
|
+
|
|
395
|
+
paths = find_paths_in_substrees("constants.source_or_dataset", config)
|
|
396
|
+
for p in paths:
|
|
397
|
+
node = glom(config, ".".join(p[:-1]))
|
|
398
|
+
node["template"] = node.pop("source_or_dataset")
|
|
399
|
+
if node["template"] == "$previous_data":
|
|
400
|
+
node["template"] = "${input.join.0.mars}"
|
|
401
|
+
paths = find_paths_in_substrees("constants.template", config)
|
|
402
|
+
for p in paths:
|
|
403
|
+
node = glom(config, ".".join(p[:-1]))
|
|
404
|
+
if node["template"] == "$pl_data":
|
|
405
|
+
node["template"] = "${input.join.0.mars}"
|
|
406
|
+
for d in ("date", "dates", "time"):
|
|
407
|
+
paths = find_paths_in_substrees(d, config)
|
|
408
|
+
for p in paths:
|
|
409
|
+
if len(p) > 1:
|
|
410
|
+
node = glom(config, ".".join(p[:-1]))
|
|
411
|
+
if isinstance(node, dict) and isinstance(node[d], str) and node[d].startswith("$"):
|
|
412
|
+
del node[d]
|
|
413
|
+
|
|
414
|
+
paths = find_paths_in_substrees("source.<<", config)
|
|
415
|
+
for p in paths:
|
|
416
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
417
|
+
node = glom(config, ".".join(p[:-1]))
|
|
418
|
+
node.update(node.pop("<<"))
|
|
419
|
+
parent[node.pop("name")] = node
|
|
420
|
+
assert len(parent) == 2
|
|
421
|
+
del parent["source"]
|
|
422
|
+
|
|
423
|
+
paths = find_paths_in_substrees("label.mars", config)
|
|
424
|
+
for p in paths:
|
|
425
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
426
|
+
node = glom(config, ".".join(p[:-1]))
|
|
427
|
+
assert node
|
|
428
|
+
assign(config, ".".join(p[:-2]), node)
|
|
429
|
+
|
|
430
|
+
paths = find_paths_in_substrees("input.dates.join", config)
|
|
431
|
+
for p in paths:
|
|
432
|
+
node = glom(config, ".".join(p))
|
|
433
|
+
config["input"]["join"] = node
|
|
434
|
+
del config["input"]["dates"]
|
|
435
|
+
|
|
436
|
+
paths = find_paths_in_substrees("source.name", config)
|
|
437
|
+
for p in paths:
|
|
438
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
439
|
+
node = glom(config, ".".join(p[:-1]))
|
|
440
|
+
name = node.pop("name")
|
|
441
|
+
assign(config, ".".join(p[:-2]), {name: node})
|
|
442
|
+
|
|
443
|
+
paths = find_paths_in_substrees("function.name", config)
|
|
444
|
+
for p in paths:
|
|
445
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
446
|
+
node = glom(config, ".".join(p[:-1]))
|
|
447
|
+
name = node.pop("name")
|
|
448
|
+
assert node
|
|
449
|
+
assign(config, ".".join(p[:-2]), {name: node})
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _migrate(config: dict, n) -> dict:
|
|
453
|
+
|
|
454
|
+
result = config.copy()
|
|
455
|
+
|
|
456
|
+
_fix_input_0(result)
|
|
457
|
+
# _fix_loops(result, config)
|
|
458
|
+
# _fix_input_1(result, config)
|
|
459
|
+
# _fix_join(result, config)
|
|
460
|
+
# _fix_chevrons(result, config)
|
|
461
|
+
# _fix_other(result, config)
|
|
462
|
+
|
|
463
|
+
for k, v in MIGRATE.items():
|
|
464
|
+
_move(config, k, v, result)
|
|
465
|
+
|
|
466
|
+
_fix_some(result)
|
|
467
|
+
_fix_sources(result, "join")
|
|
468
|
+
|
|
469
|
+
for k in DELETE:
|
|
470
|
+
_delete(result, k)
|
|
471
|
+
|
|
472
|
+
remove_empties(result)
|
|
473
|
+
|
|
474
|
+
return result
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def migrate(old: dict) -> dict:
|
|
478
|
+
|
|
479
|
+
for i in range(10):
|
|
480
|
+
new = _migrate(old, i)
|
|
481
|
+
if new == old:
|
|
482
|
+
return new
|
|
483
|
+
old = new
|
|
484
|
+
|
|
485
|
+
return new
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def has_key(config, key: str) -> bool:
|
|
489
|
+
if isinstance(config, dict):
|
|
490
|
+
if key in config:
|
|
491
|
+
return True
|
|
492
|
+
for k, v in config.items():
|
|
493
|
+
if has_key(v, key):
|
|
494
|
+
return True
|
|
495
|
+
if isinstance(config, list):
|
|
496
|
+
for item in config:
|
|
497
|
+
if has_key(item, key):
|
|
498
|
+
return True
|
|
499
|
+
return False
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def has_value(config, value: str) -> bool:
|
|
503
|
+
if isinstance(config, dict):
|
|
504
|
+
for k, v in config.items():
|
|
505
|
+
if v == value:
|
|
506
|
+
return True
|
|
507
|
+
if has_value(v, value):
|
|
508
|
+
return True
|
|
509
|
+
|
|
510
|
+
if isinstance(config, list):
|
|
511
|
+
for item in config:
|
|
512
|
+
if item == value:
|
|
513
|
+
return True
|
|
514
|
+
if has_value(item, value):
|
|
515
|
+
return True
|
|
516
|
+
return config == value
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def check(config):
|
|
520
|
+
|
|
521
|
+
try:
|
|
522
|
+
|
|
523
|
+
validate_config(config)
|
|
524
|
+
assert config.get("input", {})
|
|
525
|
+
assert config.get("dates", {})
|
|
526
|
+
assert not has_key(config, "label")
|
|
527
|
+
assert not has_key(config, "kwargs")
|
|
528
|
+
assert not has_value(config, "$previous_data")
|
|
529
|
+
assert not has_value(config, "$pl_data")
|
|
530
|
+
assert not has_value(config, "$dates")
|
|
531
|
+
assert not has_key(config, "inherit")
|
|
532
|
+
assert not has_key(config, "source_or_dataset")
|
|
533
|
+
assert not has_key(config, "<<")
|
|
534
|
+
|
|
535
|
+
for n in SOURCES.keys():
|
|
536
|
+
assert not has_key(config, n), f"Source {n} found in config. Please update to {SOURCES[n]}."
|
|
537
|
+
|
|
538
|
+
except Exception as e:
|
|
539
|
+
print("Validation failed:")
|
|
540
|
+
print(e)
|
|
541
|
+
print(yaml_dump(config))
|
|
542
|
+
sys.exit(1)
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def migrate_recipe(args: Any, config) -> None:
|
|
546
|
+
|
|
547
|
+
print(f"Migrating {args.path}")
|
|
548
|
+
|
|
549
|
+
migrated = migrate(config)
|
|
550
|
+
|
|
551
|
+
check(migrated)
|
|
552
|
+
if migrated == config:
|
|
553
|
+
return None
|
|
554
|
+
|
|
555
|
+
return migrated
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
import importlib
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from anemoi.datasets.validate import validate_dataset
|
|
14
|
+
|
|
15
|
+
from . import Command
|
|
16
|
+
|
|
17
|
+
LOG = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
DEFAULT_DATASET = "aifs-ea-an-oper-0001-mars-o96-1979-2023-6h-v8"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Validate(Command):
|
|
23
|
+
"""Command to validate an anemoi dataset."""
|
|
24
|
+
|
|
25
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
26
|
+
"""Add arguments to the command parser.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
command_parser : Any
|
|
31
|
+
The command parser.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
command_parser.add_argument("--callable", metavar="DATASET", default="anemoi.datasets.open_dataset")
|
|
35
|
+
command_parser.add_argument("--costly-checks", action="store_true", help="Run costly checks")
|
|
36
|
+
command_parser.add_argument("--detailed", action="store_true", help="Give detailed report")
|
|
37
|
+
command_parser.add_argument("path", metavar="DATASET")
|
|
38
|
+
|
|
39
|
+
def run(self, args: Any) -> None:
|
|
40
|
+
"""Run the command.
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
args : Any
|
|
45
|
+
The command arguments.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
module_path, func_name = args.callable.rsplit(".", 1)
|
|
49
|
+
module = importlib.import_module(module_path)
|
|
50
|
+
callable_func = getattr(module, func_name)
|
|
51
|
+
|
|
52
|
+
if args.path == "default":
|
|
53
|
+
args.path = DEFAULT_DATASET
|
|
54
|
+
|
|
55
|
+
dataset = callable_func(args.path)
|
|
56
|
+
validate_dataset(dataset, costly_checks=args.costly_checks, detailed=args.detailed)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
command = Validate
|
|
@@ -10,9 +10,6 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
from typing import Any
|
|
13
|
-
from typing import Dict
|
|
14
|
-
from typing import Optional
|
|
15
|
-
from typing import Tuple
|
|
16
13
|
|
|
17
14
|
import numpy as np
|
|
18
15
|
from earthkit.data.core.temporary import temp_file
|
|
@@ -36,7 +33,7 @@ SKIP = ("class", "stream", "type", "number", "expver", "_leg_number", "anoffset"
|
|
|
36
33
|
|
|
37
34
|
|
|
38
35
|
def check_compatible(
|
|
39
|
-
f1: Any, f2: Any, centre_field_as_mars:
|
|
36
|
+
f1: Any, f2: Any, centre_field_as_mars: dict[str, Any], ensemble_field_as_mars: dict[str, Any]
|
|
40
37
|
) -> None:
|
|
41
38
|
"""Check if two fields are compatible.
|
|
42
39
|
|
|
@@ -75,9 +72,9 @@ def recentre(
|
|
|
75
72
|
*,
|
|
76
73
|
members: Any,
|
|
77
74
|
centre: Any,
|
|
78
|
-
clip_variables:
|
|
75
|
+
clip_variables: tuple[str, ...] = CLIP_VARIABLES,
|
|
79
76
|
alpha: float = 1.0,
|
|
80
|
-
output:
|
|
77
|
+
output: str | None = None,
|
|
81
78
|
) -> Any:
|
|
82
79
|
"""Recentre ensemble members around the centre field.
|
|
83
80
|
|