anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/recipe/__init__.py +93 -0
  3. anemoi/datasets/commands/recipe/format.py +55 -0
  4. anemoi/datasets/commands/recipe/migrate.py +555 -0
  5. anemoi/datasets/create/__init__.py +42 -1
  6. anemoi/datasets/create/config.py +2 -0
  7. anemoi/datasets/create/input/__init__.py +43 -63
  8. anemoi/datasets/create/input/action.py +296 -236
  9. anemoi/datasets/create/input/context/__init__.py +71 -0
  10. anemoi/datasets/create/input/context/field.py +54 -0
  11. anemoi/datasets/create/input/data_sources.py +2 -1
  12. anemoi/datasets/create/input/misc.py +0 -71
  13. anemoi/datasets/create/input/repeated_dates.py +0 -114
  14. anemoi/datasets/create/input/result/__init__.py +17 -0
  15. anemoi/datasets/create/input/{result.py → result/field.py} +9 -89
  16. anemoi/datasets/create/sources/accumulations.py +74 -94
  17. anemoi/datasets/create/sources/accumulations2.py +16 -45
  18. anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
  19. anemoi/datasets/create/sources/constants.py +39 -38
  20. anemoi/datasets/create/sources/empty.py +26 -22
  21. anemoi/datasets/create/sources/forcings.py +29 -28
  22. anemoi/datasets/create/sources/grib.py +92 -72
  23. anemoi/datasets/create/sources/grib_index.py +46 -42
  24. anemoi/datasets/create/sources/hindcasts.py +56 -55
  25. anemoi/datasets/create/sources/legacy.py +10 -62
  26. anemoi/datasets/create/sources/mars.py +107 -131
  27. anemoi/datasets/create/sources/netcdf.py +28 -24
  28. anemoi/datasets/create/sources/opendap.py +28 -24
  29. anemoi/datasets/create/sources/recentre.py +42 -41
  30. anemoi/datasets/create/sources/repeated_dates.py +44 -0
  31. anemoi/datasets/create/sources/source.py +26 -48
  32. anemoi/datasets/create/sources/tendencies.py +67 -94
  33. anemoi/datasets/create/sources/xarray_support/__init__.py +29 -24
  34. anemoi/datasets/create/sources/xarray_support/field.py +4 -4
  35. anemoi/datasets/create/sources/xarray_zarr.py +28 -24
  36. anemoi/datasets/create/sources/zenodo.py +43 -39
  37. anemoi/datasets/create/utils.py +0 -42
  38. anemoi/datasets/data/dataset.py +6 -0
  39. anemoi/datasets/data/grids.py +0 -152
  40. anemoi/datasets/data/rolling_average.py +141 -0
  41. anemoi/datasets/data/stores.py +7 -9
  42. anemoi/datasets/dates/__init__.py +2 -0
  43. anemoi/datasets/dumper.py +76 -0
  44. anemoi/datasets/grids.py +1 -178
  45. anemoi/datasets/schemas/recipe.json +131 -0
  46. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +5 -2
  47. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/RECORD +51 -51
  48. anemoi/datasets/create/filter.py +0 -47
  49. anemoi/datasets/create/input/concat.py +0 -161
  50. anemoi/datasets/create/input/context.py +0 -86
  51. anemoi/datasets/create/input/empty.py +0 -53
  52. anemoi/datasets/create/input/filter.py +0 -117
  53. anemoi/datasets/create/input/function.py +0 -232
  54. anemoi/datasets/create/input/join.py +0 -129
  55. anemoi/datasets/create/input/pipe.py +0 -66
  56. anemoi/datasets/create/input/step.py +0 -173
  57. anemoi/datasets/create/input/template.py +0 -161
  58. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
  59. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
  60. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
  61. {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.5.27'
32
- __version_tuple__ = version_tuple = (0, 5, 27)
31
+ __version__ = version = '0.5.28'
32
+ __version_tuple__ = version_tuple = (0, 5, 28)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -0,0 +1,93 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ import argparse
12
+ import logging
13
+ import sys
14
+ from typing import Any
15
+
16
+ import yaml
17
+
18
+ from anemoi.datasets.create import validate_config
19
+
20
+ from .. import Command
21
+ from .format import format_recipe
22
+ from .migrate import migrate_recipe
23
+
24
+ LOG = logging.getLogger(__name__)
25
+
26
+
27
+ class Recipe(Command):
28
+ def add_arguments(self, command_parser: Any) -> None:
29
+ """Add arguments to the command parser.
30
+
31
+ Parameters
32
+ ----------
33
+ command_parser : Any
34
+ Command parser object.
35
+ """
36
+
37
+ command_parser.add_argument("--validate", action="store_true", help="Validate recipe.")
38
+ command_parser.add_argument("--format", action="store_true", help="Format the recipe.")
39
+ command_parser.add_argument("--migrate", action="store_true", help="Migrate the recipe to the latest version.")
40
+
41
+ group = command_parser.add_mutually_exclusive_group()
42
+ group.add_argument("--inplace", action="store_true", help="Overwrite the recipe file in place.")
43
+ group.add_argument("--output", type=str, help="Output file path for the converted recipe.")
44
+
45
+ command_parser.add_argument(
46
+ "path",
47
+ help="Path to recipe.",
48
+ )
49
+
50
+ def run(self, args: Any) -> None:
51
+
52
+ if not args.validate and not args.format and not args.migrate:
53
+ args.validate = True
54
+
55
+ with open(args.path) as file:
56
+ config = yaml.safe_load(file)
57
+
58
+ assert isinstance(config, dict)
59
+
60
+ if args.validate:
61
+ if args.inplace and (not args.format and not args.migrate):
62
+ argparse.ArgumentError(None, "--inplace is not supported with --validate.")
63
+
64
+ if args.output and (not args.format and not args.migrate):
65
+ argparse.ArgumentError(None, "--output is not supported with --validate.")
66
+
67
+ validate_config(config)
68
+ LOG.info(f"{args.path}: Recipe is valid.")
69
+ return
70
+
71
+ if args.migrate:
72
+ config = migrate_recipe(args, config)
73
+ if config is None:
74
+ LOG.info(f"{args.path}: No changes needed.")
75
+ return
76
+
77
+ args.format = True
78
+
79
+ if args.format:
80
+ formatted = format_recipe(args, config)
81
+ assert "dates" in formatted
82
+ f = sys.stdout
83
+ if args.output:
84
+ f = open(args.output, "w")
85
+
86
+ if args.inplace:
87
+ f = open(args.path, "w")
88
+
89
+ print(formatted, file=f)
90
+ f.close()
91
+
92
+
93
+ command = Recipe
@@ -0,0 +1,55 @@
1
+ # (C) Copyright 2025 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ import datetime
12
+ import logging
13
+
14
+ from ...dumper import yaml_dump
15
+
16
+ LOG = logging.getLogger(__name__)
17
+
18
+
19
+ def make_dates(config):
20
+ if isinstance(config, dict):
21
+ return {k: make_dates(v) for k, v in config.items()}
22
+ if isinstance(config, list):
23
+ return [make_dates(v) for v in config]
24
+ if isinstance(config, str):
25
+ try:
26
+ return datetime.datetime.fromisoformat(config)
27
+ except ValueError:
28
+ return config
29
+ return config
30
+
31
+
32
+ ORDER = (
33
+ "name",
34
+ "description",
35
+ "dataset_status",
36
+ "licence",
37
+ "attribution",
38
+ "env",
39
+ "dates",
40
+ "common",
41
+ "data_sources",
42
+ "input",
43
+ "output",
44
+ "statistics",
45
+ "build",
46
+ "platform",
47
+ )
48
+
49
+
50
+ def format_recipe(args, config: dict) -> str:
51
+
52
+ config = make_dates(config)
53
+ assert config
54
+
55
+ return yaml_dump(config, order=ORDER)
@@ -0,0 +1,555 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ import logging
12
+ import sys
13
+ from collections.abc import Sequence
14
+ from typing import Any
15
+
16
+ from glom import assign
17
+ from glom import delete
18
+ from glom import glom
19
+
20
+ from anemoi.datasets.create import validate_config
21
+ from anemoi.datasets.dumper import yaml_dump
22
+
23
+ LOG = logging.getLogger(__name__)
24
+
25
+
26
+ def find_paths(data, target_key=None, target_value=None, *path):
27
+
28
+ matches = []
29
+
30
+ if isinstance(data, dict):
31
+ for k, v in data.items():
32
+ if (target_key is not None and k == target_key) or (target_value is not None and v == target_value):
33
+ matches.append(list(path) + [k])
34
+ matches.extend(find_paths(v, target_key, target_value, *path, k))
35
+ elif isinstance(data, Sequence) and not isinstance(data, (str, bytes)):
36
+ for i, item in enumerate(data):
37
+ matches.extend(find_paths(item, target_key, target_value, *path, str(i)))
38
+ return matches
39
+
40
+
41
+ def find_chevrons(data, *path):
42
+
43
+ matches = []
44
+
45
+ if isinstance(data, dict):
46
+ for k, v in data.items():
47
+ if k == "<<":
48
+ matches.append(list(path) + [k])
49
+ matches.extend(find_chevrons(v, *path, k))
50
+ elif isinstance(data, list):
51
+ for i, item in enumerate(data):
52
+ matches.extend(find_chevrons(item, *path, str(i)))
53
+ return matches
54
+
55
+
56
+ def find_paths_in_substrees(path, obj, cur_path=None):
57
+ if cur_path is None:
58
+ cur_path = []
59
+ matches = []
60
+ try:
61
+ glom(obj, path) # just to check existence
62
+ matches.append(cur_path + path.split("."))
63
+ except Exception:
64
+ pass
65
+
66
+ if isinstance(obj, dict):
67
+ for k, v in obj.items():
68
+ matches.extend(find_paths_in_substrees(path, v, cur_path + [k]))
69
+ elif isinstance(obj, list):
70
+ for i, v in enumerate(obj):
71
+ matches.extend(find_paths_in_substrees(path, v, cur_path + [str(i)]))
72
+ return matches
73
+
74
+
75
+ MIGRATE = {
76
+ "output.statistics_end": "statistics.end",
77
+ "has_nans": "statistics.allow_nans",
78
+ "loop.dates.group_by": "build.group_by",
79
+ "loop.0.dates.group_by": "build.group_by",
80
+ "loop.dates": "dates",
81
+ "loop.0.dates": "dates",
82
+ "copyright": "attribution",
83
+ "dates.<<": "dates",
84
+ "options.group_by": "build.group_by",
85
+ "loops.0.loop_a.dates": "dates",
86
+ "loop.0.loop_a.dates": "dates",
87
+ "dates.stop": "dates.end",
88
+ "dates.group_by": "build.group_by",
89
+ "include.mars": "data_sources.mars.mars",
90
+ "ensemble_dimension": "build.ensemble_dimension",
91
+ "flatten_grid": "build.flatten_grid",
92
+ }
93
+
94
+ DELETE = [
95
+ "purpose",
96
+ # "input.join.0.label",
97
+ "status",
98
+ "common",
99
+ "config_format_version",
100
+ "aliases",
101
+ # "platform",
102
+ "loops.0.loop_a.applies_to",
103
+ "loop.0.loop_a.applies_to",
104
+ "dataset_status",
105
+ "alias",
106
+ "resources",
107
+ "input.dates.<<",
108
+ "input.dates.join.0.label.name",
109
+ ]
110
+
111
+
112
+ SOURCES = {
113
+ "oper-accumulations": "accumulations",
114
+ "era5-accumulations": "accumulations",
115
+ "ensemble-perturbations": "recentre",
116
+ "ensemble_perturbations": "recentre",
117
+ "perturbations": "recentre",
118
+ "custom-regrid": "regrid",
119
+ }
120
+
121
+ MARKER = object()
122
+
123
+
124
+ def _delete(config, path):
125
+ x = glom(config, path, default=MARKER)
126
+ if x is MARKER:
127
+ return
128
+ delete(config, path)
129
+
130
+
131
+ def _move(config, path, new_path, result):
132
+ x = glom(config, path, default=MARKER)
133
+ if x is MARKER:
134
+ return
135
+ delete(result, path)
136
+ assign(result, new_path, x, missing=dict)
137
+
138
+
139
+ def _fix_input_0(config):
140
+ if isinstance(config["input"], dict):
141
+ return
142
+
143
+ input = config["input"]
144
+ new_input = []
145
+
146
+ blocks = {}
147
+ first = None
148
+ for block in input:
149
+ assert isinstance(block, dict), block
150
+
151
+ assert len(block) == 1, block
152
+
153
+ block_name, values = list(block.items())[0]
154
+
155
+ if "kwargs" in values:
156
+ inherit = values.pop("inherit", None)
157
+ assert len(values) == 1, values
158
+ values = values["kwargs"]
159
+ values.pop("date", None)
160
+ source_name = values.pop("name", None)
161
+
162
+ if inherit is not None:
163
+ if inherit.startswith("$"):
164
+ inherit = inherit[1:]
165
+ inherited = blocks[inherit].copy()
166
+ inherited.update(values)
167
+ values = inherited
168
+
169
+ if first is None:
170
+ first = source_name
171
+
172
+ blocks[block_name] = values.copy()
173
+
174
+ new_input.append({SOURCES.get(source_name, source_name): values.copy()})
175
+ else:
176
+ assert False, f"Block {block_name} does not have 'kwargs': {values}"
177
+
178
+ blocks[block_name] = values.copy()
179
+
180
+ config["input"] = dict(join=new_input)
181
+
182
+
183
+ def _fix_input_1(result, config):
184
+ if isinstance(config["input"], dict):
185
+ return
186
+
187
+ input = config["input"]
188
+ join = []
189
+ for k in input:
190
+ assert isinstance(k, dict)
191
+ assert len(k) == 1, f"Input key {k} is not a string: {input}"
192
+ name, values = list(k.items())[0]
193
+ join.append(values)
194
+
195
+ result["input"] = {"join": join}
196
+ config["input"] = result["input"].copy()
197
+
198
+
199
+ def remove_empties(config: dict) -> None:
200
+ """Remove empty dictionaries and lists from the config."""
201
+ if isinstance(config, dict):
202
+ keys_to_delete = [k for k, v in config.items() if v in (None, {}, [], [{}])]
203
+
204
+ for k in keys_to_delete:
205
+ del config[k]
206
+
207
+ for k, v in config.items():
208
+ remove_empties(v)
209
+
210
+ if isinstance(config, list):
211
+ for item in config:
212
+ remove_empties(item)
213
+
214
+
215
+ def _fix_loops(result: dict, config: dict) -> None:
216
+ if "loops" not in config:
217
+ return
218
+
219
+ input = config["input"]
220
+ loops = config["loops"]
221
+
222
+ assert isinstance(loops, list), loops
223
+ assert isinstance(input, list), input
224
+
225
+ entries = {}
226
+ dates_block = None
227
+ for loop in loops:
228
+ assert isinstance(loop, dict), loop
229
+ assert len(loop) == 1, loop
230
+ loop = list(loop.values())[0]
231
+ applies_to = loop["applies_to"]
232
+ dates = loop["dates"]
233
+ assert isinstance(applies_to, list), (applies_to, loop)
234
+ for a in applies_to:
235
+ entries[a] = dates.copy()
236
+
237
+ if "start" in dates:
238
+ start = dates["start"]
239
+ else:
240
+ start = max(dates["values"])
241
+
242
+ if "end" in dates or "stop" in dates:
243
+ end = dates.get("end", dates.get("stop"))
244
+ else:
245
+ end = min(dates["values"])
246
+
247
+ if dates_block is None:
248
+ dates_block = {
249
+ "start": start,
250
+ "end": end,
251
+ }
252
+
253
+ if "frequency" in dates:
254
+ if "frequency" not in dates_block:
255
+ dates_block["frequency"] = dates["frequency"]
256
+ else:
257
+ assert dates_block["frequency"] == dates["frequency"], (dates_block["frequency"], dates["frequency"])
258
+
259
+ dates_block["start"] = min(dates_block["start"], start)
260
+ dates_block["end"] = max(dates_block["end"], end)
261
+
262
+ concat = []
263
+ result["input"] = {"concat": concat}
264
+
265
+ print("Found loops:", entries)
266
+
267
+ for block in input:
268
+ assert isinstance(block, dict), block
269
+ assert len(block) == 1, block
270
+ name, values = list(block.items())[0]
271
+ assert name in entries, f"Loop {name} not found in loops: {list(entries.keys())}"
272
+ dates = entries[name].copy()
273
+
274
+ assert "kwargs" not in values
275
+
276
+ concat.append(dict(dates=dates, **values))
277
+
278
+ d = concat[0]["dates"]
279
+ if all(c["dates"] == d for c in concat):
280
+ join = []
281
+ for c in concat:
282
+ del c["dates"]
283
+ join.append(c)
284
+ result["input"] = {"join": join}
285
+
286
+ del config["loops"]
287
+ config["input"] = result["input"].copy()
288
+ config["dates"] = dates_block.copy()
289
+ del result["loops"]
290
+ result["dates"] = dates_block
291
+
292
+
293
+ def _fix_other(result: dict, config: dict) -> None:
294
+ paths = find_paths(config, target_key="source_or_dataset", target_value="$previous_data")
295
+ for p in paths:
296
+ print(f"Fixing {'.'.join(p)}")
297
+ assign(result, ".".join(p[:-1] + ["template"]), "${input.join.0.mars}", missing=dict)
298
+ delete(result, ".".join(p))
299
+
300
+ paths = find_paths(config, target_key="date", target_value="$dates")
301
+ for p in paths:
302
+ delete(result, ".".join(p))
303
+
304
+
305
+ def _fix_join(result: dict, config: dict) -> None:
306
+ print("Fixing join...")
307
+ input = config["input"]
308
+ if "dates" in input and "join" in input["dates"]:
309
+ result["input"]["join"] = input["dates"]["join"]
310
+ config["input"]["join"] = input["dates"]["join"].copy()
311
+
312
+ if "join" not in input:
313
+ return
314
+
315
+ join = input["join"]
316
+ new_join = []
317
+ for j in join:
318
+ assert isinstance(j, dict)
319
+ assert len(j) == 1
320
+
321
+ key, values = list(j.items())[0]
322
+
323
+ if key not in ("label", "source"):
324
+ return
325
+
326
+ assert isinstance(values, dict), f"Join values for {key} should be a dict: {values}"
327
+ if key == "label":
328
+ j = values
329
+ j.pop("name")
330
+ key, values = list(j.items())[0]
331
+
332
+ print(values)
333
+ source_name = values.pop("name", "mars")
334
+ new_join.append(
335
+ {
336
+ SOURCES.get(source_name, source_name): values,
337
+ }
338
+ )
339
+
340
+ result["input"] = {"join": new_join}
341
+ config["input"] = result["input"].copy()
342
+
343
+
344
+ def _fix_sources(config: dict, what) -> None:
345
+
346
+ input = config["input"]
347
+ if what not in input:
348
+ return
349
+
350
+ join = input[what]
351
+ new_join = []
352
+ for j in join:
353
+ assert isinstance(j, dict)
354
+ assert len(j) == 1, j
355
+
356
+ key, values = list(j.items())[0]
357
+
358
+ key = SOURCES.get(key, key)
359
+
360
+ new_join.append(
361
+ {
362
+ key: values,
363
+ }
364
+ )
365
+
366
+ config["input"][what] = new_join
367
+ config["input"][what] = new_join.copy()
368
+
369
+
370
+ def _assign(config, path, value):
371
+ print(f"Assign {path} {value}")
372
+ assign(config, path, value)
373
+
374
+
375
+ def _fix_chevrons(result: dict, config: dict) -> None:
376
+ print("Fixing chevrons...")
377
+ paths = find_chevrons(config)
378
+ for p in paths:
379
+ a = glom(config, ".".join(p))
380
+ b = glom(config, ".".join(p[:-1]))
381
+ delete(result, ".".join(p))
382
+ a.update(b)
383
+ assign(result, ".".join(p[:-1]), a)
384
+
385
+
386
+ def _fix_some(config: dict) -> None:
387
+
388
+ paths = find_paths_in_substrees("label.function", config)
389
+ for p in paths:
390
+ parent = glom(config, ".".join(p[:-2]))
391
+ node = glom(config, ".".join(p[:-1]))
392
+ assert node
393
+ _assign(config, ".".join(p[:-2]), node)
394
+
395
+ paths = find_paths_in_substrees("constants.source_or_dataset", config)
396
+ for p in paths:
397
+ node = glom(config, ".".join(p[:-1]))
398
+ node["template"] = node.pop("source_or_dataset")
399
+ if node["template"] == "$previous_data":
400
+ node["template"] = "${input.join.0.mars}"
401
+ paths = find_paths_in_substrees("constants.template", config)
402
+ for p in paths:
403
+ node = glom(config, ".".join(p[:-1]))
404
+ if node["template"] == "$pl_data":
405
+ node["template"] = "${input.join.0.mars}"
406
+ for d in ("date", "dates", "time"):
407
+ paths = find_paths_in_substrees(d, config)
408
+ for p in paths:
409
+ if len(p) > 1:
410
+ node = glom(config, ".".join(p[:-1]))
411
+ if isinstance(node, dict) and isinstance(node[d], str) and node[d].startswith("$"):
412
+ del node[d]
413
+
414
+ paths = find_paths_in_substrees("source.<<", config)
415
+ for p in paths:
416
+ parent = glom(config, ".".join(p[:-2]))
417
+ node = glom(config, ".".join(p[:-1]))
418
+ node.update(node.pop("<<"))
419
+ parent[node.pop("name")] = node
420
+ assert len(parent) == 2
421
+ del parent["source"]
422
+
423
+ paths = find_paths_in_substrees("label.mars", config)
424
+ for p in paths:
425
+ parent = glom(config, ".".join(p[:-2]))
426
+ node = glom(config, ".".join(p[:-1]))
427
+ assert node
428
+ assign(config, ".".join(p[:-2]), node)
429
+
430
+ paths = find_paths_in_substrees("input.dates.join", config)
431
+ for p in paths:
432
+ node = glom(config, ".".join(p))
433
+ config["input"]["join"] = node
434
+ del config["input"]["dates"]
435
+
436
+ paths = find_paths_in_substrees("source.name", config)
437
+ for p in paths:
438
+ parent = glom(config, ".".join(p[:-2]))
439
+ node = glom(config, ".".join(p[:-1]))
440
+ name = node.pop("name")
441
+ assign(config, ".".join(p[:-2]), {name: node})
442
+
443
+ paths = find_paths_in_substrees("function.name", config)
444
+ for p in paths:
445
+ parent = glom(config, ".".join(p[:-2]))
446
+ node = glom(config, ".".join(p[:-1]))
447
+ name = node.pop("name")
448
+ assert node
449
+ assign(config, ".".join(p[:-2]), {name: node})
450
+
451
+
452
+ def _migrate(config: dict, n) -> dict:
453
+
454
+ result = config.copy()
455
+
456
+ _fix_input_0(result)
457
+ # _fix_loops(result, config)
458
+ # _fix_input_1(result, config)
459
+ # _fix_join(result, config)
460
+ # _fix_chevrons(result, config)
461
+ # _fix_other(result, config)
462
+
463
+ for k, v in MIGRATE.items():
464
+ _move(config, k, v, result)
465
+
466
+ _fix_some(result)
467
+ _fix_sources(result, "join")
468
+
469
+ for k in DELETE:
470
+ _delete(result, k)
471
+
472
+ remove_empties(result)
473
+
474
+ return result
475
+
476
+
477
+ def migrate(old: dict) -> dict:
478
+
479
+ for i in range(10):
480
+ new = _migrate(old, i)
481
+ if new == old:
482
+ return new
483
+ old = new
484
+
485
+ return new
486
+
487
+
488
+ def has_key(config, key: str) -> bool:
489
+ if isinstance(config, dict):
490
+ if key in config:
491
+ return True
492
+ for k, v in config.items():
493
+ if has_key(v, key):
494
+ return True
495
+ if isinstance(config, list):
496
+ for item in config:
497
+ if has_key(item, key):
498
+ return True
499
+ return False
500
+
501
+
502
+ def has_value(config, value: str) -> bool:
503
+ if isinstance(config, dict):
504
+ for k, v in config.items():
505
+ if v == value:
506
+ return True
507
+ if has_value(v, value):
508
+ return True
509
+
510
+ if isinstance(config, list):
511
+ for item in config:
512
+ if item == value:
513
+ return True
514
+ if has_value(item, value):
515
+ return True
516
+ return config == value
517
+
518
+
519
+ def check(config):
520
+
521
+ try:
522
+
523
+ validate_config(config)
524
+ assert config.get("input", {})
525
+ assert config.get("dates", {})
526
+ assert not has_key(config, "label")
527
+ assert not has_key(config, "kwargs")
528
+ assert not has_value(config, "$previous_data")
529
+ assert not has_value(config, "$pl_data")
530
+ assert not has_value(config, "$dates")
531
+ assert not has_key(config, "inherit")
532
+ assert not has_key(config, "source_or_dataset")
533
+ assert not has_key(config, "<<")
534
+
535
+ for n in SOURCES.keys():
536
+ assert not has_key(config, n), f"Source {n} found in config. Please update to {SOURCES[n]}."
537
+
538
+ except Exception as e:
539
+ print("Validation failed:")
540
+ print(e)
541
+ print(yaml_dump(config))
542
+ sys.exit(1)
543
+
544
+
545
+ def migrate_recipe(args: Any, config) -> None:
546
+
547
+ print(f"Migrating {args.path}")
548
+
549
+ migrated = migrate(config)
550
+
551
+ check(migrated)
552
+ if migrated == config:
553
+ return None
554
+
555
+ return migrated