anemoi-datasets 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. anemoi/datasets/__init__.py +1 -2
  2. anemoi/datasets/_version.py +16 -3
  3. anemoi/datasets/commands/check.py +1 -1
  4. anemoi/datasets/commands/copy.py +1 -2
  5. anemoi/datasets/commands/create.py +1 -1
  6. anemoi/datasets/commands/inspect.py +27 -35
  7. anemoi/datasets/commands/recipe/__init__.py +93 -0
  8. anemoi/datasets/commands/recipe/format.py +55 -0
  9. anemoi/datasets/commands/recipe/migrate.py +555 -0
  10. anemoi/datasets/commands/validate.py +59 -0
  11. anemoi/datasets/compute/recentre.py +3 -6
  12. anemoi/datasets/create/__init__.py +64 -26
  13. anemoi/datasets/create/check.py +10 -12
  14. anemoi/datasets/create/chunks.py +1 -2
  15. anemoi/datasets/create/config.py +5 -6
  16. anemoi/datasets/create/input/__init__.py +44 -65
  17. anemoi/datasets/create/input/action.py +296 -238
  18. anemoi/datasets/create/input/context/__init__.py +71 -0
  19. anemoi/datasets/create/input/context/field.py +54 -0
  20. anemoi/datasets/create/input/data_sources.py +7 -9
  21. anemoi/datasets/create/input/misc.py +2 -75
  22. anemoi/datasets/create/input/repeated_dates.py +11 -130
  23. anemoi/datasets/{utils → create/input/result}/__init__.py +10 -1
  24. anemoi/datasets/create/input/{result.py → result/field.py} +36 -120
  25. anemoi/datasets/create/input/trace.py +1 -1
  26. anemoi/datasets/create/patch.py +1 -2
  27. anemoi/datasets/create/persistent.py +3 -5
  28. anemoi/datasets/create/size.py +1 -3
  29. anemoi/datasets/create/sources/accumulations.py +120 -145
  30. anemoi/datasets/create/sources/accumulations2.py +20 -53
  31. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  32. anemoi/datasets/create/sources/constants.py +39 -40
  33. anemoi/datasets/create/sources/empty.py +22 -19
  34. anemoi/datasets/create/sources/fdb.py +133 -0
  35. anemoi/datasets/create/sources/forcings.py +29 -29
  36. anemoi/datasets/create/sources/grib.py +94 -78
  37. anemoi/datasets/create/sources/grib_index.py +57 -55
  38. anemoi/datasets/create/sources/hindcasts.py +57 -59
  39. anemoi/datasets/create/sources/legacy.py +10 -62
  40. anemoi/datasets/create/sources/mars.py +121 -149
  41. anemoi/datasets/create/sources/netcdf.py +28 -25
  42. anemoi/datasets/create/sources/opendap.py +28 -26
  43. anemoi/datasets/create/sources/patterns.py +4 -6
  44. anemoi/datasets/create/sources/recentre.py +46 -48
  45. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  46. anemoi/datasets/create/sources/source.py +26 -51
  47. anemoi/datasets/create/sources/tendencies.py +68 -98
  48. anemoi/datasets/create/sources/xarray.py +4 -6
  49. anemoi/datasets/create/sources/xarray_support/__init__.py +40 -36
  50. anemoi/datasets/create/sources/xarray_support/coordinates.py +8 -12
  51. anemoi/datasets/create/sources/xarray_support/field.py +20 -16
  52. anemoi/datasets/create/sources/xarray_support/fieldlist.py +11 -15
  53. anemoi/datasets/create/sources/xarray_support/flavour.py +42 -42
  54. anemoi/datasets/create/sources/xarray_support/grid.py +15 -9
  55. anemoi/datasets/create/sources/xarray_support/metadata.py +19 -128
  56. anemoi/datasets/create/sources/xarray_support/patch.py +4 -6
  57. anemoi/datasets/create/sources/xarray_support/time.py +10 -13
  58. anemoi/datasets/create/sources/xarray_support/variable.py +21 -21
  59. anemoi/datasets/create/sources/xarray_zarr.py +28 -25
  60. anemoi/datasets/create/sources/zenodo.py +43 -41
  61. anemoi/datasets/create/statistics/__init__.py +3 -6
  62. anemoi/datasets/create/testing.py +4 -0
  63. anemoi/datasets/create/typing.py +1 -2
  64. anemoi/datasets/create/utils.py +0 -43
  65. anemoi/datasets/create/zarr.py +7 -2
  66. anemoi/datasets/data/__init__.py +15 -6
  67. anemoi/datasets/data/complement.py +7 -12
  68. anemoi/datasets/data/concat.py +5 -8
  69. anemoi/datasets/data/dataset.py +48 -47
  70. anemoi/datasets/data/debug.py +7 -9
  71. anemoi/datasets/data/ensemble.py +4 -6
  72. anemoi/datasets/data/fill_missing.py +7 -10
  73. anemoi/datasets/data/forwards.py +22 -26
  74. anemoi/datasets/data/grids.py +12 -168
  75. anemoi/datasets/data/indexing.py +9 -12
  76. anemoi/datasets/data/interpolate.py +7 -15
  77. anemoi/datasets/data/join.py +8 -12
  78. anemoi/datasets/data/masked.py +6 -11
  79. anemoi/datasets/data/merge.py +5 -9
  80. anemoi/datasets/data/misc.py +41 -45
  81. anemoi/datasets/data/missing.py +11 -16
  82. anemoi/datasets/data/observations/__init__.py +8 -14
  83. anemoi/datasets/data/padded.py +3 -5
  84. anemoi/datasets/data/records/backends/__init__.py +2 -2
  85. anemoi/datasets/data/rescale.py +5 -12
  86. anemoi/datasets/data/rolling_average.py +141 -0
  87. anemoi/datasets/data/select.py +13 -16
  88. anemoi/datasets/data/statistics.py +4 -7
  89. anemoi/datasets/data/stores.py +22 -29
  90. anemoi/datasets/data/subset.py +8 -11
  91. anemoi/datasets/data/unchecked.py +7 -11
  92. anemoi/datasets/data/xy.py +25 -21
  93. anemoi/datasets/dates/__init__.py +15 -18
  94. anemoi/datasets/dates/groups.py +7 -10
  95. anemoi/datasets/dumper.py +76 -0
  96. anemoi/datasets/grids.py +4 -185
  97. anemoi/datasets/schemas/recipe.json +131 -0
  98. anemoi/datasets/testing.py +93 -7
  99. anemoi/datasets/validate.py +598 -0
  100. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +7 -4
  101. anemoi_datasets-0.5.28.dist-info/RECORD +134 -0
  102. anemoi/datasets/create/filter.py +0 -48
  103. anemoi/datasets/create/input/concat.py +0 -164
  104. anemoi/datasets/create/input/context.py +0 -89
  105. anemoi/datasets/create/input/empty.py +0 -54
  106. anemoi/datasets/create/input/filter.py +0 -118
  107. anemoi/datasets/create/input/function.py +0 -233
  108. anemoi/datasets/create/input/join.py +0 -130
  109. anemoi/datasets/create/input/pipe.py +0 -66
  110. anemoi/datasets/create/input/step.py +0 -177
  111. anemoi/datasets/create/input/template.py +0 -162
  112. anemoi_datasets-0.5.26.dist-info/RECORD +0 -131
  113. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  114. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  115. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  116. {anemoi_datasets-0.5.26.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,555 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ import logging
12
+ import sys
13
+ from collections.abc import Sequence
14
+ from typing import Any
15
+
16
+ from glom import assign
17
+ from glom import delete
18
+ from glom import glom
19
+
20
+ from anemoi.datasets.create import validate_config
21
+ from anemoi.datasets.dumper import yaml_dump
22
+
23
+ LOG = logging.getLogger(__name__)
24
+
25
+
26
+ def find_paths(data, target_key=None, target_value=None, *path):
27
+
28
+ matches = []
29
+
30
+ if isinstance(data, dict):
31
+ for k, v in data.items():
32
+ if (target_key is not None and k == target_key) or (target_value is not None and v == target_value):
33
+ matches.append(list(path) + [k])
34
+ matches.extend(find_paths(v, target_key, target_value, *path, k))
35
+ elif isinstance(data, Sequence) and not isinstance(data, (str, bytes)):
36
+ for i, item in enumerate(data):
37
+ matches.extend(find_paths(item, target_key, target_value, *path, str(i)))
38
+ return matches
39
+
40
+
41
+ def find_chevrons(data, *path):
42
+
43
+ matches = []
44
+
45
+ if isinstance(data, dict):
46
+ for k, v in data.items():
47
+ if k == "<<":
48
+ matches.append(list(path) + [k])
49
+ matches.extend(find_chevrons(v, *path, k))
50
+ elif isinstance(data, list):
51
+ for i, item in enumerate(data):
52
+ matches.extend(find_chevrons(item, *path, str(i)))
53
+ return matches
54
+
55
+
56
+ def find_paths_in_substrees(path, obj, cur_path=None):
57
+ if cur_path is None:
58
+ cur_path = []
59
+ matches = []
60
+ try:
61
+ glom(obj, path) # just to check existence
62
+ matches.append(cur_path + path.split("."))
63
+ except Exception:
64
+ pass
65
+
66
+ if isinstance(obj, dict):
67
+ for k, v in obj.items():
68
+ matches.extend(find_paths_in_substrees(path, v, cur_path + [k]))
69
+ elif isinstance(obj, list):
70
+ for i, v in enumerate(obj):
71
+ matches.extend(find_paths_in_substrees(path, v, cur_path + [str(i)]))
72
+ return matches
73
+
74
+
75
+ MIGRATE = {
76
+ "output.statistics_end": "statistics.end",
77
+ "has_nans": "statistics.allow_nans",
78
+ "loop.dates.group_by": "build.group_by",
79
+ "loop.0.dates.group_by": "build.group_by",
80
+ "loop.dates": "dates",
81
+ "loop.0.dates": "dates",
82
+ "copyright": "attribution",
83
+ "dates.<<": "dates",
84
+ "options.group_by": "build.group_by",
85
+ "loops.0.loop_a.dates": "dates",
86
+ "loop.0.loop_a.dates": "dates",
87
+ "dates.stop": "dates.end",
88
+ "dates.group_by": "build.group_by",
89
+ "include.mars": "data_sources.mars.mars",
90
+ "ensemble_dimension": "build.ensemble_dimension",
91
+ "flatten_grid": "build.flatten_grid",
92
+ }
93
+
94
+ DELETE = [
95
+ "purpose",
96
+ # "input.join.0.label",
97
+ "status",
98
+ "common",
99
+ "config_format_version",
100
+ "aliases",
101
+ # "platform",
102
+ "loops.0.loop_a.applies_to",
103
+ "loop.0.loop_a.applies_to",
104
+ "dataset_status",
105
+ "alias",
106
+ "resources",
107
+ "input.dates.<<",
108
+ "input.dates.join.0.label.name",
109
+ ]
110
+
111
+
112
+ SOURCES = {
113
+ "oper-accumulations": "accumulations",
114
+ "era5-accumulations": "accumulations",
115
+ "ensemble-perturbations": "recentre",
116
+ "ensemble_perturbations": "recentre",
117
+ "perturbations": "recentre",
118
+ "custom-regrid": "regrid",
119
+ }
120
+
121
+ MARKER = object()
122
+
123
+
124
+ def _delete(config, path):
125
+ x = glom(config, path, default=MARKER)
126
+ if x is MARKER:
127
+ return
128
+ delete(config, path)
129
+
130
+
131
+ def _move(config, path, new_path, result):
132
+ x = glom(config, path, default=MARKER)
133
+ if x is MARKER:
134
+ return
135
+ delete(result, path)
136
+ assign(result, new_path, x, missing=dict)
137
+
138
+
139
+ def _fix_input_0(config):
140
+ if isinstance(config["input"], dict):
141
+ return
142
+
143
+ input = config["input"]
144
+ new_input = []
145
+
146
+ blocks = {}
147
+ first = None
148
+ for block in input:
149
+ assert isinstance(block, dict), block
150
+
151
+ assert len(block) == 1, block
152
+
153
+ block_name, values = list(block.items())[0]
154
+
155
+ if "kwargs" in values:
156
+ inherit = values.pop("inherit", None)
157
+ assert len(values) == 1, values
158
+ values = values["kwargs"]
159
+ values.pop("date", None)
160
+ source_name = values.pop("name", None)
161
+
162
+ if inherit is not None:
163
+ if inherit.startswith("$"):
164
+ inherit = inherit[1:]
165
+ inherited = blocks[inherit].copy()
166
+ inherited.update(values)
167
+ values = inherited
168
+
169
+ if first is None:
170
+ first = source_name
171
+
172
+ blocks[block_name] = values.copy()
173
+
174
+ new_input.append({SOURCES.get(source_name, source_name): values.copy()})
175
+ else:
176
+ assert False, f"Block {block_name} does not have 'kwargs': {values}"
177
+
178
+ blocks[block_name] = values.copy()
179
+
180
+ config["input"] = dict(join=new_input)
181
+
182
+
183
+ def _fix_input_1(result, config):
184
+ if isinstance(config["input"], dict):
185
+ return
186
+
187
+ input = config["input"]
188
+ join = []
189
+ for k in input:
190
+ assert isinstance(k, dict)
191
+ assert len(k) == 1, f"Input key {k} is not a string: {input}"
192
+ name, values = list(k.items())[0]
193
+ join.append(values)
194
+
195
+ result["input"] = {"join": join}
196
+ config["input"] = result["input"].copy()
197
+
198
+
199
+ def remove_empties(config: dict) -> None:
200
+ """Remove empty dictionaries and lists from the config."""
201
+ if isinstance(config, dict):
202
+ keys_to_delete = [k for k, v in config.items() if v in (None, {}, [], [{}])]
203
+
204
+ for k in keys_to_delete:
205
+ del config[k]
206
+
207
+ for k, v in config.items():
208
+ remove_empties(v)
209
+
210
+ if isinstance(config, list):
211
+ for item in config:
212
+ remove_empties(item)
213
+
214
+
215
+ def _fix_loops(result: dict, config: dict) -> None:
216
+ if "loops" not in config:
217
+ return
218
+
219
+ input = config["input"]
220
+ loops = config["loops"]
221
+
222
+ assert isinstance(loops, list), loops
223
+ assert isinstance(input, list), input
224
+
225
+ entries = {}
226
+ dates_block = None
227
+ for loop in loops:
228
+ assert isinstance(loop, dict), loop
229
+ assert len(loop) == 1, loop
230
+ loop = list(loop.values())[0]
231
+ applies_to = loop["applies_to"]
232
+ dates = loop["dates"]
233
+ assert isinstance(applies_to, list), (applies_to, loop)
234
+ for a in applies_to:
235
+ entries[a] = dates.copy()
236
+
237
+ if "start" in dates:
238
+ start = dates["start"]
239
+ else:
240
+ start = max(dates["values"])
241
+
242
+ if "end" in dates or "stop" in dates:
243
+ end = dates.get("end", dates.get("stop"))
244
+ else:
245
+ end = min(dates["values"])
246
+
247
+ if dates_block is None:
248
+ dates_block = {
249
+ "start": start,
250
+ "end": end,
251
+ }
252
+
253
+ if "frequency" in dates:
254
+ if "frequency" not in dates_block:
255
+ dates_block["frequency"] = dates["frequency"]
256
+ else:
257
+ assert dates_block["frequency"] == dates["frequency"], (dates_block["frequency"], dates["frequency"])
258
+
259
+ dates_block["start"] = min(dates_block["start"], start)
260
+ dates_block["end"] = max(dates_block["end"], end)
261
+
262
+ concat = []
263
+ result["input"] = {"concat": concat}
264
+
265
+ print("Found loops:", entries)
266
+
267
+ for block in input:
268
+ assert isinstance(block, dict), block
269
+ assert len(block) == 1, block
270
+ name, values = list(block.items())[0]
271
+ assert name in entries, f"Loop {name} not found in loops: {list(entries.keys())}"
272
+ dates = entries[name].copy()
273
+
274
+ assert "kwargs" not in values
275
+
276
+ concat.append(dict(dates=dates, **values))
277
+
278
+ d = concat[0]["dates"]
279
+ if all(c["dates"] == d for c in concat):
280
+ join = []
281
+ for c in concat:
282
+ del c["dates"]
283
+ join.append(c)
284
+ result["input"] = {"join": join}
285
+
286
+ del config["loops"]
287
+ config["input"] = result["input"].copy()
288
+ config["dates"] = dates_block.copy()
289
+ del result["loops"]
290
+ result["dates"] = dates_block
291
+
292
+
293
+ def _fix_other(result: dict, config: dict) -> None:
294
+ paths = find_paths(config, target_key="source_or_dataset", target_value="$previous_data")
295
+ for p in paths:
296
+ print(f"Fixing {'.'.join(p)}")
297
+ assign(result, ".".join(p[:-1] + ["template"]), "${input.join.0.mars}", missing=dict)
298
+ delete(result, ".".join(p))
299
+
300
+ paths = find_paths(config, target_key="date", target_value="$dates")
301
+ for p in paths:
302
+ delete(result, ".".join(p))
303
+
304
+
305
+ def _fix_join(result: dict, config: dict) -> None:
306
+ print("Fixing join...")
307
+ input = config["input"]
308
+ if "dates" in input and "join" in input["dates"]:
309
+ result["input"]["join"] = input["dates"]["join"]
310
+ config["input"]["join"] = input["dates"]["join"].copy()
311
+
312
+ if "join" not in input:
313
+ return
314
+
315
+ join = input["join"]
316
+ new_join = []
317
+ for j in join:
318
+ assert isinstance(j, dict)
319
+ assert len(j) == 1
320
+
321
+ key, values = list(j.items())[0]
322
+
323
+ if key not in ("label", "source"):
324
+ return
325
+
326
+ assert isinstance(values, dict), f"Join values for {key} should be a dict: {values}"
327
+ if key == "label":
328
+ j = values
329
+ j.pop("name")
330
+ key, values = list(j.items())[0]
331
+
332
+ print(values)
333
+ source_name = values.pop("name", "mars")
334
+ new_join.append(
335
+ {
336
+ SOURCES.get(source_name, source_name): values,
337
+ }
338
+ )
339
+
340
+ result["input"] = {"join": new_join}
341
+ config["input"] = result["input"].copy()
342
+
343
+
344
+ def _fix_sources(config: dict, what) -> None:
345
+
346
+ input = config["input"]
347
+ if what not in input:
348
+ return
349
+
350
+ join = input[what]
351
+ new_join = []
352
+ for j in join:
353
+ assert isinstance(j, dict)
354
+ assert len(j) == 1, j
355
+
356
+ key, values = list(j.items())[0]
357
+
358
+ key = SOURCES.get(key, key)
359
+
360
+ new_join.append(
361
+ {
362
+ key: values,
363
+ }
364
+ )
365
+
366
+ config["input"][what] = new_join
367
+ config["input"][what] = new_join.copy()
368
+
369
+
370
+ def _assign(config, path, value):
371
+ print(f"Assign {path} {value}")
372
+ assign(config, path, value)
373
+
374
+
375
+ def _fix_chevrons(result: dict, config: dict) -> None:
376
+ print("Fixing chevrons...")
377
+ paths = find_chevrons(config)
378
+ for p in paths:
379
+ a = glom(config, ".".join(p))
380
+ b = glom(config, ".".join(p[:-1]))
381
+ delete(result, ".".join(p))
382
+ a.update(b)
383
+ assign(result, ".".join(p[:-1]), a)
384
+
385
+
386
+ def _fix_some(config: dict) -> None:
387
+
388
+ paths = find_paths_in_substrees("label.function", config)
389
+ for p in paths:
390
+ parent = glom(config, ".".join(p[:-2]))
391
+ node = glom(config, ".".join(p[:-1]))
392
+ assert node
393
+ _assign(config, ".".join(p[:-2]), node)
394
+
395
+ paths = find_paths_in_substrees("constants.source_or_dataset", config)
396
+ for p in paths:
397
+ node = glom(config, ".".join(p[:-1]))
398
+ node["template"] = node.pop("source_or_dataset")
399
+ if node["template"] == "$previous_data":
400
+ node["template"] = "${input.join.0.mars}"
401
+ paths = find_paths_in_substrees("constants.template", config)
402
+ for p in paths:
403
+ node = glom(config, ".".join(p[:-1]))
404
+ if node["template"] == "$pl_data":
405
+ node["template"] = "${input.join.0.mars}"
406
+ for d in ("date", "dates", "time"):
407
+ paths = find_paths_in_substrees(d, config)
408
+ for p in paths:
409
+ if len(p) > 1:
410
+ node = glom(config, ".".join(p[:-1]))
411
+ if isinstance(node, dict) and isinstance(node[d], str) and node[d].startswith("$"):
412
+ del node[d]
413
+
414
+ paths = find_paths_in_substrees("source.<<", config)
415
+ for p in paths:
416
+ parent = glom(config, ".".join(p[:-2]))
417
+ node = glom(config, ".".join(p[:-1]))
418
+ node.update(node.pop("<<"))
419
+ parent[node.pop("name")] = node
420
+ assert len(parent) == 2
421
+ del parent["source"]
422
+
423
+ paths = find_paths_in_substrees("label.mars", config)
424
+ for p in paths:
425
+ parent = glom(config, ".".join(p[:-2]))
426
+ node = glom(config, ".".join(p[:-1]))
427
+ assert node
428
+ assign(config, ".".join(p[:-2]), node)
429
+
430
+ paths = find_paths_in_substrees("input.dates.join", config)
431
+ for p in paths:
432
+ node = glom(config, ".".join(p))
433
+ config["input"]["join"] = node
434
+ del config["input"]["dates"]
435
+
436
+ paths = find_paths_in_substrees("source.name", config)
437
+ for p in paths:
438
+ parent = glom(config, ".".join(p[:-2]))
439
+ node = glom(config, ".".join(p[:-1]))
440
+ name = node.pop("name")
441
+ assign(config, ".".join(p[:-2]), {name: node})
442
+
443
+ paths = find_paths_in_substrees("function.name", config)
444
+ for p in paths:
445
+ parent = glom(config, ".".join(p[:-2]))
446
+ node = glom(config, ".".join(p[:-1]))
447
+ name = node.pop("name")
448
+ assert node
449
+ assign(config, ".".join(p[:-2]), {name: node})
450
+
451
+
452
+ def _migrate(config: dict, n) -> dict:
453
+
454
+ result = config.copy()
455
+
456
+ _fix_input_0(result)
457
+ # _fix_loops(result, config)
458
+ # _fix_input_1(result, config)
459
+ # _fix_join(result, config)
460
+ # _fix_chevrons(result, config)
461
+ # _fix_other(result, config)
462
+
463
+ for k, v in MIGRATE.items():
464
+ _move(config, k, v, result)
465
+
466
+ _fix_some(result)
467
+ _fix_sources(result, "join")
468
+
469
+ for k in DELETE:
470
+ _delete(result, k)
471
+
472
+ remove_empties(result)
473
+
474
+ return result
475
+
476
+
477
+ def migrate(old: dict) -> dict:
478
+
479
+ for i in range(10):
480
+ new = _migrate(old, i)
481
+ if new == old:
482
+ return new
483
+ old = new
484
+
485
+ return new
486
+
487
+
488
+ def has_key(config, key: str) -> bool:
489
+ if isinstance(config, dict):
490
+ if key in config:
491
+ return True
492
+ for k, v in config.items():
493
+ if has_key(v, key):
494
+ return True
495
+ if isinstance(config, list):
496
+ for item in config:
497
+ if has_key(item, key):
498
+ return True
499
+ return False
500
+
501
+
502
+ def has_value(config, value: str) -> bool:
503
+ if isinstance(config, dict):
504
+ for k, v in config.items():
505
+ if v == value:
506
+ return True
507
+ if has_value(v, value):
508
+ return True
509
+
510
+ if isinstance(config, list):
511
+ for item in config:
512
+ if item == value:
513
+ return True
514
+ if has_value(item, value):
515
+ return True
516
+ return config == value
517
+
518
+
519
+ def check(config):
520
+
521
+ try:
522
+
523
+ validate_config(config)
524
+ assert config.get("input", {})
525
+ assert config.get("dates", {})
526
+ assert not has_key(config, "label")
527
+ assert not has_key(config, "kwargs")
528
+ assert not has_value(config, "$previous_data")
529
+ assert not has_value(config, "$pl_data")
530
+ assert not has_value(config, "$dates")
531
+ assert not has_key(config, "inherit")
532
+ assert not has_key(config, "source_or_dataset")
533
+ assert not has_key(config, "<<")
534
+
535
+ for n in SOURCES.keys():
536
+ assert not has_key(config, n), f"Source {n} found in config. Please update to {SOURCES[n]}."
537
+
538
+ except Exception as e:
539
+ print("Validation failed:")
540
+ print(e)
541
+ print(yaml_dump(config))
542
+ sys.exit(1)
543
+
544
+
545
+ def migrate_recipe(args: Any, config) -> None:
546
+
547
+ print(f"Migrating {args.path}")
548
+
549
+ migrated = migrate(config)
550
+
551
+ check(migrated)
552
+ if migrated == config:
553
+ return None
554
+
555
+ return migrated
@@ -0,0 +1,59 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+ import importlib
10
+ import logging
11
+ from typing import Any
12
+
13
+ from anemoi.datasets.validate import validate_dataset
14
+
15
+ from . import Command
16
+
17
+ LOG = logging.getLogger(__name__)
18
+
19
+ DEFAULT_DATASET = "aifs-ea-an-oper-0001-mars-o96-1979-2023-6h-v8"
20
+
21
+
22
+ class Validate(Command):
23
+ """Command to validate an anemoi dataset."""
24
+
25
+ def add_arguments(self, command_parser: Any) -> None:
26
+ """Add arguments to the command parser.
27
+
28
+ Parameters
29
+ ----------
30
+ command_parser : Any
31
+ The command parser.
32
+ """
33
+
34
+ command_parser.add_argument("--callable", metavar="DATASET", default="anemoi.datasets.open_dataset")
35
+ command_parser.add_argument("--costly-checks", action="store_true", help="Run costly checks")
36
+ command_parser.add_argument("--detailed", action="store_true", help="Give detailed report")
37
+ command_parser.add_argument("path", metavar="DATASET")
38
+
39
+ def run(self, args: Any) -> None:
40
+ """Run the command.
41
+
42
+ Parameters
43
+ ----------
44
+ args : Any
45
+ The command arguments.
46
+ """
47
+
48
+ module_path, func_name = args.callable.rsplit(".", 1)
49
+ module = importlib.import_module(module_path)
50
+ callable_func = getattr(module, func_name)
51
+
52
+ if args.path == "default":
53
+ args.path = DEFAULT_DATASET
54
+
55
+ dataset = callable_func(args.path)
56
+ validate_dataset(dataset, costly_checks=args.costly_checks, detailed=args.detailed)
57
+
58
+
59
+ command = Validate
@@ -10,9 +10,6 @@
10
10
 
11
11
  import logging
12
12
  from typing import Any
13
- from typing import Dict
14
- from typing import Optional
15
- from typing import Tuple
16
13
 
17
14
  import numpy as np
18
15
  from earthkit.data.core.temporary import temp_file
@@ -36,7 +33,7 @@ SKIP = ("class", "stream", "type", "number", "expver", "_leg_number", "anoffset"
36
33
 
37
34
 
38
35
  def check_compatible(
39
- f1: Any, f2: Any, centre_field_as_mars: Dict[str, Any], ensemble_field_as_mars: Dict[str, Any]
36
+ f1: Any, f2: Any, centre_field_as_mars: dict[str, Any], ensemble_field_as_mars: dict[str, Any]
40
37
  ) -> None:
41
38
  """Check if two fields are compatible.
42
39
 
@@ -75,9 +72,9 @@ def recentre(
75
72
  *,
76
73
  members: Any,
77
74
  centre: Any,
78
- clip_variables: Tuple[str, ...] = CLIP_VARIABLES,
75
+ clip_variables: tuple[str, ...] = CLIP_VARIABLES,
79
76
  alpha: float = 1.0,
80
- output: Optional[str] = None,
77
+ output: str | None = None,
81
78
  ) -> Any:
82
79
  """Recentre ensemble members around the centre field.
83
80