anemoi-datasets 0.5.27__py3-none-any.whl → 0.5.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/commands/recipe/__init__.py +93 -0
- anemoi/datasets/commands/recipe/format.py +55 -0
- anemoi/datasets/commands/recipe/migrate.py +555 -0
- anemoi/datasets/create/__init__.py +42 -1
- anemoi/datasets/create/config.py +2 -0
- anemoi/datasets/create/input/__init__.py +43 -63
- anemoi/datasets/create/input/action.py +296 -236
- anemoi/datasets/create/input/context/__init__.py +71 -0
- anemoi/datasets/create/input/context/field.py +54 -0
- anemoi/datasets/create/input/data_sources.py +2 -1
- anemoi/datasets/create/input/misc.py +0 -71
- anemoi/datasets/create/input/repeated_dates.py +0 -114
- anemoi/datasets/create/input/result/__init__.py +17 -0
- anemoi/datasets/create/input/{result.py → result/field.py} +9 -89
- anemoi/datasets/create/sources/accumulations.py +74 -94
- anemoi/datasets/create/sources/accumulations2.py +16 -45
- anemoi/datasets/create/sources/anemoi_dataset.py +46 -42
- anemoi/datasets/create/sources/constants.py +39 -38
- anemoi/datasets/create/sources/empty.py +26 -22
- anemoi/datasets/create/sources/forcings.py +29 -28
- anemoi/datasets/create/sources/grib.py +92 -72
- anemoi/datasets/create/sources/grib_index.py +46 -42
- anemoi/datasets/create/sources/hindcasts.py +56 -55
- anemoi/datasets/create/sources/legacy.py +10 -62
- anemoi/datasets/create/sources/mars.py +107 -131
- anemoi/datasets/create/sources/netcdf.py +28 -24
- anemoi/datasets/create/sources/opendap.py +28 -24
- anemoi/datasets/create/sources/recentre.py +42 -41
- anemoi/datasets/create/sources/repeated_dates.py +44 -0
- anemoi/datasets/create/sources/source.py +26 -48
- anemoi/datasets/create/sources/tendencies.py +67 -94
- anemoi/datasets/create/sources/xarray_support/__init__.py +29 -24
- anemoi/datasets/create/sources/xarray_support/field.py +4 -4
- anemoi/datasets/create/sources/xarray_zarr.py +28 -24
- anemoi/datasets/create/sources/zenodo.py +43 -39
- anemoi/datasets/create/utils.py +0 -42
- anemoi/datasets/data/dataset.py +6 -0
- anemoi/datasets/data/grids.py +0 -152
- anemoi/datasets/data/rolling_average.py +141 -0
- anemoi/datasets/data/stores.py +7 -9
- anemoi/datasets/dates/__init__.py +2 -0
- anemoi/datasets/dumper.py +76 -0
- anemoi/datasets/grids.py +1 -178
- anemoi/datasets/schemas/recipe.json +131 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/METADATA +5 -2
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/RECORD +51 -51
- anemoi/datasets/create/filter.py +0 -47
- anemoi/datasets/create/input/concat.py +0 -161
- anemoi/datasets/create/input/context.py +0 -86
- anemoi/datasets/create/input/empty.py +0 -53
- anemoi/datasets/create/input/filter.py +0 -117
- anemoi/datasets/create/input/function.py +0 -232
- anemoi/datasets/create/input/join.py +0 -129
- anemoi/datasets/create/input/pipe.py +0 -66
- anemoi/datasets/create/input/step.py +0 -173
- anemoi/datasets/create/input/template.py +0 -161
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/WHEEL +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.27.dist-info → anemoi_datasets-0.5.28.dist-info}/top_level.txt +0 -0
anemoi/datasets/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.28'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 28)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import logging
|
|
13
|
+
import sys
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
from anemoi.datasets.create import validate_config
|
|
19
|
+
|
|
20
|
+
from .. import Command
|
|
21
|
+
from .format import format_recipe
|
|
22
|
+
from .migrate import migrate_recipe
|
|
23
|
+
|
|
24
|
+
LOG = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class Recipe(Command):
|
|
28
|
+
def add_arguments(self, command_parser: Any) -> None:
|
|
29
|
+
"""Add arguments to the command parser.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
command_parser : Any
|
|
34
|
+
Command parser object.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
command_parser.add_argument("--validate", action="store_true", help="Validate recipe.")
|
|
38
|
+
command_parser.add_argument("--format", action="store_true", help="Format the recipe.")
|
|
39
|
+
command_parser.add_argument("--migrate", action="store_true", help="Migrate the recipe to the latest version.")
|
|
40
|
+
|
|
41
|
+
group = command_parser.add_mutually_exclusive_group()
|
|
42
|
+
group.add_argument("--inplace", action="store_true", help="Overwrite the recipe file in place.")
|
|
43
|
+
group.add_argument("--output", type=str, help="Output file path for the converted recipe.")
|
|
44
|
+
|
|
45
|
+
command_parser.add_argument(
|
|
46
|
+
"path",
|
|
47
|
+
help="Path to recipe.",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def run(self, args: Any) -> None:
|
|
51
|
+
|
|
52
|
+
if not args.validate and not args.format and not args.migrate:
|
|
53
|
+
args.validate = True
|
|
54
|
+
|
|
55
|
+
with open(args.path) as file:
|
|
56
|
+
config = yaml.safe_load(file)
|
|
57
|
+
|
|
58
|
+
assert isinstance(config, dict)
|
|
59
|
+
|
|
60
|
+
if args.validate:
|
|
61
|
+
if args.inplace and (not args.format and not args.migrate):
|
|
62
|
+
argparse.ArgumentError(None, "--inplace is not supported with --validate.")
|
|
63
|
+
|
|
64
|
+
if args.output and (not args.format and not args.migrate):
|
|
65
|
+
argparse.ArgumentError(None, "--output is not supported with --validate.")
|
|
66
|
+
|
|
67
|
+
validate_config(config)
|
|
68
|
+
LOG.info(f"{args.path}: Recipe is valid.")
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
if args.migrate:
|
|
72
|
+
config = migrate_recipe(args, config)
|
|
73
|
+
if config is None:
|
|
74
|
+
LOG.info(f"{args.path}: No changes needed.")
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
args.format = True
|
|
78
|
+
|
|
79
|
+
if args.format:
|
|
80
|
+
formatted = format_recipe(args, config)
|
|
81
|
+
assert "dates" in formatted
|
|
82
|
+
f = sys.stdout
|
|
83
|
+
if args.output:
|
|
84
|
+
f = open(args.output, "w")
|
|
85
|
+
|
|
86
|
+
if args.inplace:
|
|
87
|
+
f = open(args.path, "w")
|
|
88
|
+
|
|
89
|
+
print(formatted, file=f)
|
|
90
|
+
f.close()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
command = Recipe
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# (C) Copyright 2025 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import datetime
|
|
12
|
+
import logging
|
|
13
|
+
|
|
14
|
+
from ...dumper import yaml_dump
|
|
15
|
+
|
|
16
|
+
LOG = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def make_dates(config):
|
|
20
|
+
if isinstance(config, dict):
|
|
21
|
+
return {k: make_dates(v) for k, v in config.items()}
|
|
22
|
+
if isinstance(config, list):
|
|
23
|
+
return [make_dates(v) for v in config]
|
|
24
|
+
if isinstance(config, str):
|
|
25
|
+
try:
|
|
26
|
+
return datetime.datetime.fromisoformat(config)
|
|
27
|
+
except ValueError:
|
|
28
|
+
return config
|
|
29
|
+
return config
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
ORDER = (
|
|
33
|
+
"name",
|
|
34
|
+
"description",
|
|
35
|
+
"dataset_status",
|
|
36
|
+
"licence",
|
|
37
|
+
"attribution",
|
|
38
|
+
"env",
|
|
39
|
+
"dates",
|
|
40
|
+
"common",
|
|
41
|
+
"data_sources",
|
|
42
|
+
"input",
|
|
43
|
+
"output",
|
|
44
|
+
"statistics",
|
|
45
|
+
"build",
|
|
46
|
+
"platform",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def format_recipe(args, config: dict) -> str:
|
|
51
|
+
|
|
52
|
+
config = make_dates(config)
|
|
53
|
+
assert config
|
|
54
|
+
|
|
55
|
+
return yaml_dump(config, order=ORDER)
|
|
@@ -0,0 +1,555 @@
|
|
|
1
|
+
# (C) Copyright 2024 Anemoi contributors.
|
|
2
|
+
#
|
|
3
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
4
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
5
|
+
#
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import sys
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from glom import assign
|
|
17
|
+
from glom import delete
|
|
18
|
+
from glom import glom
|
|
19
|
+
|
|
20
|
+
from anemoi.datasets.create import validate_config
|
|
21
|
+
from anemoi.datasets.dumper import yaml_dump
|
|
22
|
+
|
|
23
|
+
LOG = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def find_paths(data, target_key=None, target_value=None, *path):
|
|
27
|
+
|
|
28
|
+
matches = []
|
|
29
|
+
|
|
30
|
+
if isinstance(data, dict):
|
|
31
|
+
for k, v in data.items():
|
|
32
|
+
if (target_key is not None and k == target_key) or (target_value is not None and v == target_value):
|
|
33
|
+
matches.append(list(path) + [k])
|
|
34
|
+
matches.extend(find_paths(v, target_key, target_value, *path, k))
|
|
35
|
+
elif isinstance(data, Sequence) and not isinstance(data, (str, bytes)):
|
|
36
|
+
for i, item in enumerate(data):
|
|
37
|
+
matches.extend(find_paths(item, target_key, target_value, *path, str(i)))
|
|
38
|
+
return matches
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def find_chevrons(data, *path):
|
|
42
|
+
|
|
43
|
+
matches = []
|
|
44
|
+
|
|
45
|
+
if isinstance(data, dict):
|
|
46
|
+
for k, v in data.items():
|
|
47
|
+
if k == "<<":
|
|
48
|
+
matches.append(list(path) + [k])
|
|
49
|
+
matches.extend(find_chevrons(v, *path, k))
|
|
50
|
+
elif isinstance(data, list):
|
|
51
|
+
for i, item in enumerate(data):
|
|
52
|
+
matches.extend(find_chevrons(item, *path, str(i)))
|
|
53
|
+
return matches
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def find_paths_in_substrees(path, obj, cur_path=None):
|
|
57
|
+
if cur_path is None:
|
|
58
|
+
cur_path = []
|
|
59
|
+
matches = []
|
|
60
|
+
try:
|
|
61
|
+
glom(obj, path) # just to check existence
|
|
62
|
+
matches.append(cur_path + path.split("."))
|
|
63
|
+
except Exception:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
if isinstance(obj, dict):
|
|
67
|
+
for k, v in obj.items():
|
|
68
|
+
matches.extend(find_paths_in_substrees(path, v, cur_path + [k]))
|
|
69
|
+
elif isinstance(obj, list):
|
|
70
|
+
for i, v in enumerate(obj):
|
|
71
|
+
matches.extend(find_paths_in_substrees(path, v, cur_path + [str(i)]))
|
|
72
|
+
return matches
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
MIGRATE = {
|
|
76
|
+
"output.statistics_end": "statistics.end",
|
|
77
|
+
"has_nans": "statistics.allow_nans",
|
|
78
|
+
"loop.dates.group_by": "build.group_by",
|
|
79
|
+
"loop.0.dates.group_by": "build.group_by",
|
|
80
|
+
"loop.dates": "dates",
|
|
81
|
+
"loop.0.dates": "dates",
|
|
82
|
+
"copyright": "attribution",
|
|
83
|
+
"dates.<<": "dates",
|
|
84
|
+
"options.group_by": "build.group_by",
|
|
85
|
+
"loops.0.loop_a.dates": "dates",
|
|
86
|
+
"loop.0.loop_a.dates": "dates",
|
|
87
|
+
"dates.stop": "dates.end",
|
|
88
|
+
"dates.group_by": "build.group_by",
|
|
89
|
+
"include.mars": "data_sources.mars.mars",
|
|
90
|
+
"ensemble_dimension": "build.ensemble_dimension",
|
|
91
|
+
"flatten_grid": "build.flatten_grid",
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
DELETE = [
|
|
95
|
+
"purpose",
|
|
96
|
+
# "input.join.0.label",
|
|
97
|
+
"status",
|
|
98
|
+
"common",
|
|
99
|
+
"config_format_version",
|
|
100
|
+
"aliases",
|
|
101
|
+
# "platform",
|
|
102
|
+
"loops.0.loop_a.applies_to",
|
|
103
|
+
"loop.0.loop_a.applies_to",
|
|
104
|
+
"dataset_status",
|
|
105
|
+
"alias",
|
|
106
|
+
"resources",
|
|
107
|
+
"input.dates.<<",
|
|
108
|
+
"input.dates.join.0.label.name",
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
SOURCES = {
|
|
113
|
+
"oper-accumulations": "accumulations",
|
|
114
|
+
"era5-accumulations": "accumulations",
|
|
115
|
+
"ensemble-perturbations": "recentre",
|
|
116
|
+
"ensemble_perturbations": "recentre",
|
|
117
|
+
"perturbations": "recentre",
|
|
118
|
+
"custom-regrid": "regrid",
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
MARKER = object()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _delete(config, path):
|
|
125
|
+
x = glom(config, path, default=MARKER)
|
|
126
|
+
if x is MARKER:
|
|
127
|
+
return
|
|
128
|
+
delete(config, path)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _move(config, path, new_path, result):
|
|
132
|
+
x = glom(config, path, default=MARKER)
|
|
133
|
+
if x is MARKER:
|
|
134
|
+
return
|
|
135
|
+
delete(result, path)
|
|
136
|
+
assign(result, new_path, x, missing=dict)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _fix_input_0(config):
|
|
140
|
+
if isinstance(config["input"], dict):
|
|
141
|
+
return
|
|
142
|
+
|
|
143
|
+
input = config["input"]
|
|
144
|
+
new_input = []
|
|
145
|
+
|
|
146
|
+
blocks = {}
|
|
147
|
+
first = None
|
|
148
|
+
for block in input:
|
|
149
|
+
assert isinstance(block, dict), block
|
|
150
|
+
|
|
151
|
+
assert len(block) == 1, block
|
|
152
|
+
|
|
153
|
+
block_name, values = list(block.items())[0]
|
|
154
|
+
|
|
155
|
+
if "kwargs" in values:
|
|
156
|
+
inherit = values.pop("inherit", None)
|
|
157
|
+
assert len(values) == 1, values
|
|
158
|
+
values = values["kwargs"]
|
|
159
|
+
values.pop("date", None)
|
|
160
|
+
source_name = values.pop("name", None)
|
|
161
|
+
|
|
162
|
+
if inherit is not None:
|
|
163
|
+
if inherit.startswith("$"):
|
|
164
|
+
inherit = inherit[1:]
|
|
165
|
+
inherited = blocks[inherit].copy()
|
|
166
|
+
inherited.update(values)
|
|
167
|
+
values = inherited
|
|
168
|
+
|
|
169
|
+
if first is None:
|
|
170
|
+
first = source_name
|
|
171
|
+
|
|
172
|
+
blocks[block_name] = values.copy()
|
|
173
|
+
|
|
174
|
+
new_input.append({SOURCES.get(source_name, source_name): values.copy()})
|
|
175
|
+
else:
|
|
176
|
+
assert False, f"Block {block_name} does not have 'kwargs': {values}"
|
|
177
|
+
|
|
178
|
+
blocks[block_name] = values.copy()
|
|
179
|
+
|
|
180
|
+
config["input"] = dict(join=new_input)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _fix_input_1(result, config):
|
|
184
|
+
if isinstance(config["input"], dict):
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
input = config["input"]
|
|
188
|
+
join = []
|
|
189
|
+
for k in input:
|
|
190
|
+
assert isinstance(k, dict)
|
|
191
|
+
assert len(k) == 1, f"Input key {k} is not a string: {input}"
|
|
192
|
+
name, values = list(k.items())[0]
|
|
193
|
+
join.append(values)
|
|
194
|
+
|
|
195
|
+
result["input"] = {"join": join}
|
|
196
|
+
config["input"] = result["input"].copy()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def remove_empties(config: dict) -> None:
|
|
200
|
+
"""Remove empty dictionaries and lists from the config."""
|
|
201
|
+
if isinstance(config, dict):
|
|
202
|
+
keys_to_delete = [k for k, v in config.items() if v in (None, {}, [], [{}])]
|
|
203
|
+
|
|
204
|
+
for k in keys_to_delete:
|
|
205
|
+
del config[k]
|
|
206
|
+
|
|
207
|
+
for k, v in config.items():
|
|
208
|
+
remove_empties(v)
|
|
209
|
+
|
|
210
|
+
if isinstance(config, list):
|
|
211
|
+
for item in config:
|
|
212
|
+
remove_empties(item)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _fix_loops(result: dict, config: dict) -> None:
|
|
216
|
+
if "loops" not in config:
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
input = config["input"]
|
|
220
|
+
loops = config["loops"]
|
|
221
|
+
|
|
222
|
+
assert isinstance(loops, list), loops
|
|
223
|
+
assert isinstance(input, list), input
|
|
224
|
+
|
|
225
|
+
entries = {}
|
|
226
|
+
dates_block = None
|
|
227
|
+
for loop in loops:
|
|
228
|
+
assert isinstance(loop, dict), loop
|
|
229
|
+
assert len(loop) == 1, loop
|
|
230
|
+
loop = list(loop.values())[0]
|
|
231
|
+
applies_to = loop["applies_to"]
|
|
232
|
+
dates = loop["dates"]
|
|
233
|
+
assert isinstance(applies_to, list), (applies_to, loop)
|
|
234
|
+
for a in applies_to:
|
|
235
|
+
entries[a] = dates.copy()
|
|
236
|
+
|
|
237
|
+
if "start" in dates:
|
|
238
|
+
start = dates["start"]
|
|
239
|
+
else:
|
|
240
|
+
start = max(dates["values"])
|
|
241
|
+
|
|
242
|
+
if "end" in dates or "stop" in dates:
|
|
243
|
+
end = dates.get("end", dates.get("stop"))
|
|
244
|
+
else:
|
|
245
|
+
end = min(dates["values"])
|
|
246
|
+
|
|
247
|
+
if dates_block is None:
|
|
248
|
+
dates_block = {
|
|
249
|
+
"start": start,
|
|
250
|
+
"end": end,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if "frequency" in dates:
|
|
254
|
+
if "frequency" not in dates_block:
|
|
255
|
+
dates_block["frequency"] = dates["frequency"]
|
|
256
|
+
else:
|
|
257
|
+
assert dates_block["frequency"] == dates["frequency"], (dates_block["frequency"], dates["frequency"])
|
|
258
|
+
|
|
259
|
+
dates_block["start"] = min(dates_block["start"], start)
|
|
260
|
+
dates_block["end"] = max(dates_block["end"], end)
|
|
261
|
+
|
|
262
|
+
concat = []
|
|
263
|
+
result["input"] = {"concat": concat}
|
|
264
|
+
|
|
265
|
+
print("Found loops:", entries)
|
|
266
|
+
|
|
267
|
+
for block in input:
|
|
268
|
+
assert isinstance(block, dict), block
|
|
269
|
+
assert len(block) == 1, block
|
|
270
|
+
name, values = list(block.items())[0]
|
|
271
|
+
assert name in entries, f"Loop {name} not found in loops: {list(entries.keys())}"
|
|
272
|
+
dates = entries[name].copy()
|
|
273
|
+
|
|
274
|
+
assert "kwargs" not in values
|
|
275
|
+
|
|
276
|
+
concat.append(dict(dates=dates, **values))
|
|
277
|
+
|
|
278
|
+
d = concat[0]["dates"]
|
|
279
|
+
if all(c["dates"] == d for c in concat):
|
|
280
|
+
join = []
|
|
281
|
+
for c in concat:
|
|
282
|
+
del c["dates"]
|
|
283
|
+
join.append(c)
|
|
284
|
+
result["input"] = {"join": join}
|
|
285
|
+
|
|
286
|
+
del config["loops"]
|
|
287
|
+
config["input"] = result["input"].copy()
|
|
288
|
+
config["dates"] = dates_block.copy()
|
|
289
|
+
del result["loops"]
|
|
290
|
+
result["dates"] = dates_block
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _fix_other(result: dict, config: dict) -> None:
|
|
294
|
+
paths = find_paths(config, target_key="source_or_dataset", target_value="$previous_data")
|
|
295
|
+
for p in paths:
|
|
296
|
+
print(f"Fixing {'.'.join(p)}")
|
|
297
|
+
assign(result, ".".join(p[:-1] + ["template"]), "${input.join.0.mars}", missing=dict)
|
|
298
|
+
delete(result, ".".join(p))
|
|
299
|
+
|
|
300
|
+
paths = find_paths(config, target_key="date", target_value="$dates")
|
|
301
|
+
for p in paths:
|
|
302
|
+
delete(result, ".".join(p))
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _fix_join(result: dict, config: dict) -> None:
|
|
306
|
+
print("Fixing join...")
|
|
307
|
+
input = config["input"]
|
|
308
|
+
if "dates" in input and "join" in input["dates"]:
|
|
309
|
+
result["input"]["join"] = input["dates"]["join"]
|
|
310
|
+
config["input"]["join"] = input["dates"]["join"].copy()
|
|
311
|
+
|
|
312
|
+
if "join" not in input:
|
|
313
|
+
return
|
|
314
|
+
|
|
315
|
+
join = input["join"]
|
|
316
|
+
new_join = []
|
|
317
|
+
for j in join:
|
|
318
|
+
assert isinstance(j, dict)
|
|
319
|
+
assert len(j) == 1
|
|
320
|
+
|
|
321
|
+
key, values = list(j.items())[0]
|
|
322
|
+
|
|
323
|
+
if key not in ("label", "source"):
|
|
324
|
+
return
|
|
325
|
+
|
|
326
|
+
assert isinstance(values, dict), f"Join values for {key} should be a dict: {values}"
|
|
327
|
+
if key == "label":
|
|
328
|
+
j = values
|
|
329
|
+
j.pop("name")
|
|
330
|
+
key, values = list(j.items())[0]
|
|
331
|
+
|
|
332
|
+
print(values)
|
|
333
|
+
source_name = values.pop("name", "mars")
|
|
334
|
+
new_join.append(
|
|
335
|
+
{
|
|
336
|
+
SOURCES.get(source_name, source_name): values,
|
|
337
|
+
}
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
result["input"] = {"join": new_join}
|
|
341
|
+
config["input"] = result["input"].copy()
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _fix_sources(config: dict, what) -> None:
|
|
345
|
+
|
|
346
|
+
input = config["input"]
|
|
347
|
+
if what not in input:
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
join = input[what]
|
|
351
|
+
new_join = []
|
|
352
|
+
for j in join:
|
|
353
|
+
assert isinstance(j, dict)
|
|
354
|
+
assert len(j) == 1, j
|
|
355
|
+
|
|
356
|
+
key, values = list(j.items())[0]
|
|
357
|
+
|
|
358
|
+
key = SOURCES.get(key, key)
|
|
359
|
+
|
|
360
|
+
new_join.append(
|
|
361
|
+
{
|
|
362
|
+
key: values,
|
|
363
|
+
}
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
config["input"][what] = new_join
|
|
367
|
+
config["input"][what] = new_join.copy()
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _assign(config, path, value):
|
|
371
|
+
print(f"Assign {path} {value}")
|
|
372
|
+
assign(config, path, value)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _fix_chevrons(result: dict, config: dict) -> None:
|
|
376
|
+
print("Fixing chevrons...")
|
|
377
|
+
paths = find_chevrons(config)
|
|
378
|
+
for p in paths:
|
|
379
|
+
a = glom(config, ".".join(p))
|
|
380
|
+
b = glom(config, ".".join(p[:-1]))
|
|
381
|
+
delete(result, ".".join(p))
|
|
382
|
+
a.update(b)
|
|
383
|
+
assign(result, ".".join(p[:-1]), a)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _fix_some(config: dict) -> None:
|
|
387
|
+
|
|
388
|
+
paths = find_paths_in_substrees("label.function", config)
|
|
389
|
+
for p in paths:
|
|
390
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
391
|
+
node = glom(config, ".".join(p[:-1]))
|
|
392
|
+
assert node
|
|
393
|
+
_assign(config, ".".join(p[:-2]), node)
|
|
394
|
+
|
|
395
|
+
paths = find_paths_in_substrees("constants.source_or_dataset", config)
|
|
396
|
+
for p in paths:
|
|
397
|
+
node = glom(config, ".".join(p[:-1]))
|
|
398
|
+
node["template"] = node.pop("source_or_dataset")
|
|
399
|
+
if node["template"] == "$previous_data":
|
|
400
|
+
node["template"] = "${input.join.0.mars}"
|
|
401
|
+
paths = find_paths_in_substrees("constants.template", config)
|
|
402
|
+
for p in paths:
|
|
403
|
+
node = glom(config, ".".join(p[:-1]))
|
|
404
|
+
if node["template"] == "$pl_data":
|
|
405
|
+
node["template"] = "${input.join.0.mars}"
|
|
406
|
+
for d in ("date", "dates", "time"):
|
|
407
|
+
paths = find_paths_in_substrees(d, config)
|
|
408
|
+
for p in paths:
|
|
409
|
+
if len(p) > 1:
|
|
410
|
+
node = glom(config, ".".join(p[:-1]))
|
|
411
|
+
if isinstance(node, dict) and isinstance(node[d], str) and node[d].startswith("$"):
|
|
412
|
+
del node[d]
|
|
413
|
+
|
|
414
|
+
paths = find_paths_in_substrees("source.<<", config)
|
|
415
|
+
for p in paths:
|
|
416
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
417
|
+
node = glom(config, ".".join(p[:-1]))
|
|
418
|
+
node.update(node.pop("<<"))
|
|
419
|
+
parent[node.pop("name")] = node
|
|
420
|
+
assert len(parent) == 2
|
|
421
|
+
del parent["source"]
|
|
422
|
+
|
|
423
|
+
paths = find_paths_in_substrees("label.mars", config)
|
|
424
|
+
for p in paths:
|
|
425
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
426
|
+
node = glom(config, ".".join(p[:-1]))
|
|
427
|
+
assert node
|
|
428
|
+
assign(config, ".".join(p[:-2]), node)
|
|
429
|
+
|
|
430
|
+
paths = find_paths_in_substrees("input.dates.join", config)
|
|
431
|
+
for p in paths:
|
|
432
|
+
node = glom(config, ".".join(p))
|
|
433
|
+
config["input"]["join"] = node
|
|
434
|
+
del config["input"]["dates"]
|
|
435
|
+
|
|
436
|
+
paths = find_paths_in_substrees("source.name", config)
|
|
437
|
+
for p in paths:
|
|
438
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
439
|
+
node = glom(config, ".".join(p[:-1]))
|
|
440
|
+
name = node.pop("name")
|
|
441
|
+
assign(config, ".".join(p[:-2]), {name: node})
|
|
442
|
+
|
|
443
|
+
paths = find_paths_in_substrees("function.name", config)
|
|
444
|
+
for p in paths:
|
|
445
|
+
parent = glom(config, ".".join(p[:-2]))
|
|
446
|
+
node = glom(config, ".".join(p[:-1]))
|
|
447
|
+
name = node.pop("name")
|
|
448
|
+
assert node
|
|
449
|
+
assign(config, ".".join(p[:-2]), {name: node})
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def _migrate(config: dict, n) -> dict:
|
|
453
|
+
|
|
454
|
+
result = config.copy()
|
|
455
|
+
|
|
456
|
+
_fix_input_0(result)
|
|
457
|
+
# _fix_loops(result, config)
|
|
458
|
+
# _fix_input_1(result, config)
|
|
459
|
+
# _fix_join(result, config)
|
|
460
|
+
# _fix_chevrons(result, config)
|
|
461
|
+
# _fix_other(result, config)
|
|
462
|
+
|
|
463
|
+
for k, v in MIGRATE.items():
|
|
464
|
+
_move(config, k, v, result)
|
|
465
|
+
|
|
466
|
+
_fix_some(result)
|
|
467
|
+
_fix_sources(result, "join")
|
|
468
|
+
|
|
469
|
+
for k in DELETE:
|
|
470
|
+
_delete(result, k)
|
|
471
|
+
|
|
472
|
+
remove_empties(result)
|
|
473
|
+
|
|
474
|
+
return result
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def migrate(old: dict) -> dict:
|
|
478
|
+
|
|
479
|
+
for i in range(10):
|
|
480
|
+
new = _migrate(old, i)
|
|
481
|
+
if new == old:
|
|
482
|
+
return new
|
|
483
|
+
old = new
|
|
484
|
+
|
|
485
|
+
return new
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
def has_key(config, key: str) -> bool:
|
|
489
|
+
if isinstance(config, dict):
|
|
490
|
+
if key in config:
|
|
491
|
+
return True
|
|
492
|
+
for k, v in config.items():
|
|
493
|
+
if has_key(v, key):
|
|
494
|
+
return True
|
|
495
|
+
if isinstance(config, list):
|
|
496
|
+
for item in config:
|
|
497
|
+
if has_key(item, key):
|
|
498
|
+
return True
|
|
499
|
+
return False
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def has_value(config, value: str) -> bool:
|
|
503
|
+
if isinstance(config, dict):
|
|
504
|
+
for k, v in config.items():
|
|
505
|
+
if v == value:
|
|
506
|
+
return True
|
|
507
|
+
if has_value(v, value):
|
|
508
|
+
return True
|
|
509
|
+
|
|
510
|
+
if isinstance(config, list):
|
|
511
|
+
for item in config:
|
|
512
|
+
if item == value:
|
|
513
|
+
return True
|
|
514
|
+
if has_value(item, value):
|
|
515
|
+
return True
|
|
516
|
+
return config == value
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def check(config):
|
|
520
|
+
|
|
521
|
+
try:
|
|
522
|
+
|
|
523
|
+
validate_config(config)
|
|
524
|
+
assert config.get("input", {})
|
|
525
|
+
assert config.get("dates", {})
|
|
526
|
+
assert not has_key(config, "label")
|
|
527
|
+
assert not has_key(config, "kwargs")
|
|
528
|
+
assert not has_value(config, "$previous_data")
|
|
529
|
+
assert not has_value(config, "$pl_data")
|
|
530
|
+
assert not has_value(config, "$dates")
|
|
531
|
+
assert not has_key(config, "inherit")
|
|
532
|
+
assert not has_key(config, "source_or_dataset")
|
|
533
|
+
assert not has_key(config, "<<")
|
|
534
|
+
|
|
535
|
+
for n in SOURCES.keys():
|
|
536
|
+
assert not has_key(config, n), f"Source {n} found in config. Please update to {SOURCES[n]}."
|
|
537
|
+
|
|
538
|
+
except Exception as e:
|
|
539
|
+
print("Validation failed:")
|
|
540
|
+
print(e)
|
|
541
|
+
print(yaml_dump(config))
|
|
542
|
+
sys.exit(1)
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def migrate_recipe(args: Any, config) -> None:
|
|
546
|
+
|
|
547
|
+
print(f"Migrating {args.path}")
|
|
548
|
+
|
|
549
|
+
migrated = migrate(config)
|
|
550
|
+
|
|
551
|
+
check(migrated)
|
|
552
|
+
if migrated == config:
|
|
553
|
+
return None
|
|
554
|
+
|
|
555
|
+
return migrated
|